**PREPARE ENVIRONMENT**
- Due to changes in some dependencies, you will be prompted to restart the session after all the required libraries are installed. Simply restart and run this section again.

In [None]:
# @title
!pip install diffusers mediapipe transformers huggingface-hub omegaconf einops opencv-python face-alignment decord ffmpeg-python safetensors soundfile

import os
import subprocess

if not os.path.exists("LatentSync"):
    !git clone https://github.com/Isi-dev/LatentSync
%cd LatentSync

from google.colab import files
import torch
from omegaconf import OmegaConf
from diffusers import AutoencoderKL, DDIMScheduler
from latentsync.models.unet import UNet3DConditionModel
from latentsync.pipelines.lipsync_pipeline import LipsyncPipeline
from latentsync.whisper.audio2feature import Audio2Feature
from diffusers.utils.import_utils import is_xformers_available
from accelerate.utils import set_seed
import ipywidgets as widgets

os.makedirs("/root/.cache/torch/hub/checkpoints", exist_ok=True)
os.makedirs("checkpoints", exist_ok=True)

model_urls = {
    "/root/.cache/torch/hub/checkpoints/s3fd-619a316812.pth":
        "https://huggingface.co/Isi99999/LatentSync/resolve/main/auxiliary/s3fd-619a316812.pth",
    "/root/.cache/torch/hub/checkpoints/2DFAN4-cd938726ad.zip":
        "https://huggingface.co/Isi99999/LatentSync/resolve/main/auxiliary/2DFAN4-cd938726ad.zip",
    "checkpoints/latentsync_unet.pt":
        "https://huggingface.co/Isi99999/LatentSync/resolve/main/latentsync_unet.pt",
    "checkpoints/tiny.pt":
        "https://huggingface.co/Isi99999/LatentSync/resolve/main/whisper/tiny.pt",
    "checkpoints/diffusion_pytorch_model.safetensors":
        "https://huggingface.co/stabilityai/sd-vae-ft-mse/resolve/main/diffusion_pytorch_model.safetensors",
    "checkpoints/config.json":
        "https://huggingface.co/stabilityai/sd-vae-ft-mse/raw/main/config.json",
}

for file_path, url in model_urls.items():
    if not os.path.exists(file_path):
        print(f"Downloading {file_path} ...")
        subprocess.run(["wget", url, "-O", file_path], check=True)
    else:
        print(f"File {file_path} already exists. Skipping download.")

print("Setup complete.")

def perform_inference(video_path, audio_path, seed=1247, num_inference_steps=20, guidance_scale=1.0, output_path="output_video.mp4"):
    config_path = "configs/unet/first_stage.yaml"
    inference_ckpt_path = "checkpoints/latentsync_unet.pt"

    config = OmegaConf.load(config_path)

    is_fp16_supported = torch.cuda.is_available() and torch.cuda.get_device_capability()[0] > 7
    dtype = torch.float16 if is_fp16_supported else torch.float32

    scheduler = DDIMScheduler.from_pretrained("configs")

    whisper_model_path = "checkpoints/tiny.pt"
    audio_encoder = Audio2Feature(model_path=whisper_model_path, device="cuda", num_frames=config.data.num_frames)

    vae = AutoencoderKL.from_pretrained("checkpoints", torch_dtype=dtype, local_files_only=True)
    vae.config.scaling_factor = 0.18215
    vae.config.shift_factor = 0

    unet, _ = UNet3DConditionModel.from_pretrained(
        OmegaConf.to_container(config.model),
        inference_ckpt_path,
        device="cpu",
    )

    unet = unet.to(dtype=dtype)

    if is_xformers_available():
        unet.enable_xformers_memory_efficient_attention()
        print('x_formers available!')

    pipeline = LipsyncPipeline(
        vae=vae,
        audio_encoder=audio_encoder,
        unet=unet,
        scheduler=scheduler,
    ).to("cuda")

    set_seed(seed)

    pipeline(
        video_path=video_path,
        audio_path=audio_path,
        video_out_path=output_path,
        video_mask_path=output_path.replace(".mp4", "_mask.mp4"),
        num_frames=config.data.num_frames,
        num_inference_steps=num_inference_steps,
        guidance_scale=guidance_scale,
        weight_dtype=dtype,
        width=config.data.resolution,
        height=config.data.resolution,
    )
    return output_path


**RUN IMAGE TO VIDEO**

In [None]:
# @title
import cv2
import torchaudio
import subprocess

if torch.cuda.is_available():
        import gc
        torch.cuda.empty_cache()
        torch.cuda.ipc_collect()
        gc.collect()

image_upload = widgets.FileUpload(accept="image/*", multiple=False, description="Upload Image")
audio_upload = widgets.FileUpload(accept=".wav,.mp3,.aac,.flac", multiple=False, description="Upload Audio")
seed_input = widgets.IntText(value=1247, description="Seed:")
num_steps_input = widgets.IntSlider(value=20, min=1, max=100, step=1, description="Steps:")
guidance_scale_input = widgets.FloatSlider(value=1.0, min=0.1, max=10.0, step=0.1, description="Guidance Scale:")
video_scale_input = widgets.FloatSlider(value=0.5, min=0.1, max=1.0, step=0.1, description="Video Scale:")
output_fps_input = widgets.IntSlider(value=25, min=6, max=60, step=1, description="Output FPS:")

run_button = widgets.Button(description="Run Inference")
output_display = widgets.Output()

def convert_video_fps(input_path, target_fps):
    if not os.path.exists(input_path) or os.path.getsize(input_path) == 0:
        print(f"Error: The video file {input_path} is missing or empty.")
        return None

    output_path = f"converted_{target_fps}fps.mp4"

    audio_check_cmd = [
        "ffprobe", "-i", input_path, "-show_streams", "-select_streams", "a",
        "-loglevel", "error"
    ]
    audio_present = subprocess.run(audio_check_cmd, capture_output=True, text=True).stdout.strip() != ""

    cmd = [
        "ffmpeg", "-y", "-i", input_path,
        "-filter:v", f"fps={target_fps}",
        "-c:v", "libx264", "-preset", "fast", "-crf", "18",
    ]

    if audio_present:
        cmd.extend(["-c:a", "aac", "-b:a", "192k"])
    else:
        cmd.append("-an")

    cmd.append(output_path)

    subprocess.run(cmd, check=True)
    print(f"Converted video saved as {output_path}")
    return output_path

# def add_silent_frames(audio_path, target_fps=25):

#     waveform, sample_rate = torchaudio.load(audio_path)
#     silent_duration = 25 / target_fps  # Two frames at target FPS
#     silent_samples = int(silent_duration * sample_rate)
#     silent_waveform = torch.zeros((waveform.shape[0], silent_samples))

#     # Concatenate silence at the beginning for mouth correction
#     new_waveform = torch.cat((silent_waveform, waveform), dim=1)
#     new_audio_path = "audio_with_silence.wav"
#     torchaudio.save(new_audio_path, new_waveform, sample_rate)

#     return new_audio_path




def pad_audio_to_multiple_of_16(audio_path, target_fps=25):

    # audio_path = add_silent_frames(audio_path)

    waveform, sample_rate = torchaudio.load(audio_path)
    audio_duration = waveform.shape[1] / sample_rate  # Duration in seconds

    num_frames = int(audio_duration * target_fps)

    # Pad audio to ensure frame count is a multiple of 16
    remainder = num_frames % 16
    if remainder > 0:
        pad_frames = 16 - remainder
        pad_samples = int((pad_frames / target_fps) * sample_rate)
        pad_waveform = torch.zeros((waveform.shape[0], pad_samples))  # Silence padding
        waveform = torch.cat((waveform, pad_waveform), dim=1)

        # Save the padded audio
        padded_audio_path = "padded_audio.wav"
        torchaudio.save(padded_audio_path, waveform, sample_rate)
    else:
        padded_audio_path = audio_path  # No padding needed

    padded_duration = waveform.shape[1] / sample_rate
    padded_num_frames = int(padded_duration * target_fps)

    return padded_audio_path, padded_num_frames



def create_video_from_image(image_path, output_video_path, num_frames, fps=25):
    """Convert an image into a video of specified length (num_frames at 25 FPS)."""
    img = cv2.imread(image_path)
    if img is None:
        print("Error: Unable to read the image.")
        return None

    height, width, _ = img.shape
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    video_writer = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

    for _ in range(num_frames):
        video_writer.write(img)

    video_writer.release()
    print(f"Created video {output_video_path} with {num_frames} frames ({num_frames / fps:.2f} seconds).")
    return output_video_path


def on_run_button_click(change):
    with output_display:
        output_display.clear_output()

        # Validate uploads
        if not audio_upload.value or not image_upload.value:
            print("Please upload both an image and an audio file.")
            return

        # Process audio
        audio_file_info = next(iter(audio_upload.value.values()))
        audio_path = audio_file_info.get('name', 'uploaded_audio.wav')
        with open(audio_path, "wb") as f:
            f.write(audio_file_info['content'])

        # Get audio duration with padding
        audio_path, num_frames = pad_audio_to_multiple_of_16(audio_path, target_fps=25)

        # Process image and create video
        image_file_info = next(iter(image_upload.value.values()))
        image_path = image_file_info.get('name', 'uploaded_image.png')
        with open(image_path, "wb") as f:
            f.write(image_file_info['content'])

        img = cv2.imread(image_path)
        if img is None:
            print("Error: Could not read the image file.")
            return

        height, width, _ = img.shape
        video_path = "generated_video.mp4"
        video_path = create_video_from_image(image_path, video_path, num_frames)

        try:
            print("Running inference...")
            output_path = "output_video.mp4"
            perform_inference(video_path, audio_path, seed_input.value, num_steps_input.value, guidance_scale_input.value, output_path)

            output_path = convert_video_fps(output_path, output_fps_input.value)

            from IPython.display import Video
            print("Inference complete. Displaying output video:")
            display(Video(output_path, embed=True, width=int(width * video_scale_input.value), height=int(height * video_scale_input.value)))

        finally:
            torch.cuda.empty_cache()
            for path in [video_path, audio_path, image_path]:
                if path and os.path.exists(path):
                    os.remove(path)

run_button.on_click(on_run_button_click)

# Display the UI
widgets_box = widgets.VBox([
    image_upload, audio_upload,
    seed_input, num_steps_input, guidance_scale_input, video_scale_input,
    output_fps_input, run_button, output_display
])
display(widgets_box)


**RUN VIDEO TO VIDEO**
- If the input video is shorter than the audio, it will be looped to match the audio's length. To make the video play in reverse order after reaching the end, check the `loop_vid_from_endframe` box. If the input video is already a seamless loop, leave this box unchecked.

In [None]:
# @title
if torch.cuda.is_available():
        import gc
        torch.cuda.empty_cache()
        torch.cuda.ipc_collect()
        gc.collect()
import ipywidgets as widgets
import torch
import torchaudio
import subprocess
from datetime import datetime
import os
import ffmpeg
loop_vid_from_endframe = True # @param {"type":"boolean"}

def convert_video_fps(input_path, target_fps):
    if not os.path.exists(input_path) or os.path.getsize(input_path) == 0:
        print(f"Error: The video file {input_path} is missing or empty.")
        return None

    output_path = f"converted_{target_fps}fps.mp4"

    audio_check_cmd = [
        "ffprobe", "-i", input_path, "-show_streams", "-select_streams", "a",
        "-loglevel", "error"
    ]
    audio_present = subprocess.run(audio_check_cmd, capture_output=True, text=True).stdout.strip() != ""

    cmd = [
        "ffmpeg", "-y", "-i", input_path,
        "-filter:v", f"fps={target_fps}",
        "-c:v", "libx264", "-preset", "fast", "-crf", "18",
    ]

    if audio_present:
        cmd.extend(["-c:a", "aac", "-b:a", "192k"])
    else:
        cmd.append("-an")

    cmd.append(output_path)

    subprocess.run(cmd, check=True)
    print(f"Converted video saved as {output_path}")
    return output_path


def trim_video(video_path, target_duration):
    if not os.path.exists(video_path) or os.path.getsize(video_path) == 0:
        print(f"Error: The video file {video_path} is missing or empty.")
        return video_path

    has_audio = False
    try:
        probe = ffmpeg.probe(video_path, v='error', select_streams='a:0', show_entries='stream=codec_type')
        has_audio = any(stream['codec_type'] == 'audio' for stream in probe['streams'])
    except ffmpeg.Error as e:
        print(f"Error while probing video: {e}")
        return video_path

    trimmed_video_path = "trimmed_video.mp4"
    try:
        if has_audio:
            ffmpeg.input(video_path, ss=0, to=target_duration).output(trimmed_video_path, codec="libx264", audio_codec="aac").run()
        else:
            ffmpeg.input(video_path, ss=0, to=target_duration).output(trimmed_video_path, codec="libx264").run()
        print("Video trimmed")
    except ffmpeg.Error as e:
        print(f"Error during video trimming: {e}")
        return video_path

    return trimmed_video_path


def has_audio(video_path):
    try:
        probe = ffmpeg.probe(video_path, v='error', select_streams='a', show_entries='stream=index')
        return len(probe['streams']) > 0
    except ffmpeg.Error:
        return False

def extend_video(video_path, target_duration):
    if not os.path.exists(video_path) or os.path.getsize(video_path) == 0:
        print(f"Error: The video file {video_path} is missing or empty.")
        return video_path

    audio_exists = has_audio(video_path)

    try:
        probe = ffmpeg.probe(video_path, v='error', select_streams='v:0', show_entries='format=duration')
        original_duration = float(probe['format']['duration'])
    except ffmpeg.Error as e:
        error_message = e.stderr.decode() if e.stderr else "No error message available"
        print(f"Error: Unable to fetch video duration: {error_message}")
        # print(f"Error: Unable to fetch video duration: {e.stderr.decode()}")
        return video_path

    if original_duration <= 0:
        print("Error: Invalid video duration!")
        return video_path

    print("Extending video...")

    clips = [video_path]
    total_duration = original_duration
    extensions = 0

    while total_duration < target_duration:
        extensions += 1
        if loop_vid_from_endframe:
            reversed_clip = reverse_video(clips[-1], audio_exists)
            clips.append(reversed_clip)
        else:
            clips.append(clips[-1])
            # new_clip = f"copy_{extensions}_{os.path.basename(clips[-1])}"
            # shutil.copy(clips[-1], new_clip)
            # clips.append(new_clip)
        total_duration += original_duration

    print(f"The video was extended {extensions} time(s)")

    extended_video_path = "extended_video.mp4"

    try:
        inputs = [ffmpeg.input(clip) for clip in clips]

        if audio_exists:
            concat = ffmpeg.concat(*inputs, v=1, a=1).output(extended_video_path, codec="libx264", audio_codec="aac", format="mp4", vcodec="libx264", acodec="aac")
        else:
            concat = ffmpeg.concat(*inputs, v=1, a=0).output(extended_video_path, codec="libx264", format="mp4", vcodec="libx264")

        concat.run(overwrite_output=True)
    except ffmpeg.Error as e:
        error_message = e.stderr.decode() if e.stderr else "No error message available"
        print(f"Error during video concatenation: {error_message}")
        return video_path

    for clip in clips[1:]:
        if os.path.exists(clip):
            os.remove(clip)

    return extended_video_path


def reverse_video(video_path, audio_exists):

    # timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")  # Format: YYYYMMDD_HHMMSS
    reversed_video_path = f"reversed_{os.path.basename(video_path)}"
    # reversed_video_path = os.path.join(
    #     os.path.dirname(video_path),
    #     f"r_{timestamp}_{os.path.basename(video_path)}"
    # )
    try:
        if audio_exists:
            ffmpeg.input(video_path).output(reversed_video_path, vf='reverse', af='areverse').run(overwrite_output=True)
        else:
            ffmpeg.input(video_path).output(reversed_video_path, vf='reverse').run(overwrite_output=True)
    except ffmpeg.Error as e:
        error_message = e.stderr.decode() if e.stderr else "No error message available"
        print(f"Error during video reversal: {error_message}")
        return video_path

    return reversed_video_path


def get_video_duration(video_path):
    try:
        probe = ffmpeg.probe(video_path, v='error', select_streams='v:0', show_entries='format=duration')
        return float(probe['format']['duration'])
    except ffmpeg.Error as e:
        print(f"Error: Unable to fetch video duration for {video_path}: {e}")
        return 0


def pad_audio_to_multiple_of_16(audio_path, target_fps=25):
    waveform, sample_rate = torchaudio.load(audio_path)
    audio_duration = waveform.shape[1] / sample_rate
    num_frames = int(audio_duration * target_fps)
    remainder = num_frames % 16

    if remainder > 0:
        pad_frames = 16 - remainder
        pad_samples = int((pad_frames / target_fps) * sample_rate)
        pad_waveform = torch.zeros((waveform.shape[0], pad_samples))
        waveform = torch.cat((waveform, pad_waveform), dim=1)
        padded_audio_path = "padded_audio.wav"
        torchaudio.save(padded_audio_path, waveform, sample_rate)
    else:
        padded_audio_path = audio_path

    return padded_audio_path, int((waveform.shape[1] / sample_rate) * target_fps), waveform.shape[1] / sample_rate








# Rewriting some functions

def trim_video(video_path, target_duration):
    """Trim video to specified duration with robust error handling"""
    # Validate input file
    if not os.path.exists(video_path):
        print(f"Error: Video file not found at {video_path}")
        return video_path
    if os.path.getsize(video_path) == 0:
        print(f"Error: Video file is empty at {video_path}")
        return video_path
    if target_duration <= 0:
        print(f"Error: Invalid target duration {target_duration}")
        return video_path

    # Get original duration for validation
    try:
        probe = ffmpeg.probe(video_path, v='error', show_entries='format=duration')
        original_duration = float(probe['format']['duration'])
        if original_duration <= 0:
            print("Error: Could not determine valid video duration")
            return video_path

        print(f"Original duration: {original_duration:.2f}s, Target duration: {target_duration:.2f}s")

        if original_duration <= target_duration:
            print("Video is already shorter than target duration, no trimming needed")
            return video_path
    except Exception as e:
        print(f"Error probing video duration: {str(e)}")
        return video_path

    # Check for audio stream more robustly
    has_audio = False
    try:
        audio_probe = ffmpeg.probe(
            video_path,
            v='error',
            select_streams='a',
            show_entries='stream=codec_type,codec_name'
        )
        has_audio = any(stream['codec_type'] == 'audio' for stream in audio_probe.get('streams', []))
        if has_audio:
            audio_codec = audio_probe['streams'][0]['codec_name']
            print(f"Detected audio stream with codec: {audio_codec}")
    except Exception as e:
        print(f"Warning: Could not determine audio status: {str(e)}")

    # Prepare output path with timestamp to avoid conflicts
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    trimmed_video_path = f"trimmed_{timestamp}.mp4"

    # Build ffmpeg command
    try:
        input_stream = ffmpeg.input(video_path, ss=0, to=target_duration)

        output_args = {
            'c:v': 'libx264',
            'preset': 'fast',
            'crf': '18',
            'pix_fmt': 'yuv420p',
            'movflags': '+faststart'  # For web optimization
        }

        if has_audio:
            output_args['c:a'] = 'aac'
            output_args['b:a'] = '192k'
            output_args['ar'] = '44100'
            output_args['ac'] = '2'  # Stereo audio

        # Use subprocess for better error handling
        cmd = (
            input_stream
            .output(trimmed_video_path, **output_args)
            .compile()
        )

        # print(f"Running command: {' '.join(cmd)}")

        result = subprocess.run(
            cmd,
            check=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True
        )

        print("Video trimmed successfully")

    except subprocess.CalledProcessError as e:
        print(f"FFmpeg trimming failed with error:\n{e.stderr}")
        # Clean up potentially partial output file
        if os.path.exists(trimmed_video_path):
            try:
                os.remove(trimmed_video_path)
            except Exception as clean_err:
                print(f"Warning: Could not clean up failed output: {str(clean_err)}")
        return video_path
    except Exception as e:
        print(f"Unexpected error during trimming: {str(e)}")
        return video_path

    # Verify output
    if not os.path.exists(trimmed_video_path):
        print("Error: Trimmed video file was not created")
        return video_path

    if os.path.getsize(trimmed_video_path) == 0:
        print("Error: Trimmed video file is empty")
        os.remove(trimmed_video_path)
        return video_path

    try:
        output_duration = float(ffmpeg.probe(trimmed_video_path)['format']['duration'])
        duration_diff = abs(output_duration - target_duration)
        if duration_diff > 0.5:  # Allow 0.5s tolerance
            print(f"Warning: Trimmed duration is {output_duration:.2f}s (target: {target_duration:.2f}s)")
    except Exception as e:
        print(f"Warning: Could not verify output duration: {str(e)}")

    return trimmed_video_path

def extend_video(video_path, target_duration):
    if not os.path.exists(video_path) or os.path.getsize(video_path) == 0:
        print(f"Error: The video file {video_path} is missing or empty.")
        return video_path

    # Check audio existence more robustly
    audio_exists = has_audio(video_path)
    print(f"Audio exists in source: {audio_exists}")

    # Get original duration with verification
    try:
        probe = ffmpeg.probe(video_path, v='error', select_streams='v:0', show_entries='format=duration')
        original_duration = float(probe['format']['duration'])
        print(f"Original duration: {original_duration:.2f}s, Target duration: {target_duration:.2f}s")

        if original_duration <= 0:
            raise ValueError("Invalid video duration detected")
    except Exception as e:
        print(f"Error getting video duration: {str(e)}")
        return video_path

    # Calculate needed extensions
    if original_duration >= target_duration:
        print("Video already meets target duration")
        return video_path

    clips = [video_path]
    total_duration = original_duration
    extensions = 0

    # Create extended clips
    while total_duration < target_duration:
        extensions += 1
        try:
            if loop_vid_from_endframe:
                reversed_clip = reverse_video(clips[-1], audio_exists)
                if not os.path.exists(reversed_clip) or os.path.getsize(reversed_clip) == 0:
                    raise Exception("Reversed clip creation failed")
                clips.append(reversed_clip)
                # print(f"Created reversed clip: {reversed_clip}")
            else:
                clips.append(clips[-1])

            total_duration += original_duration
            # print(f"Extended to {total_duration:.2f}s (iteration {extensions})")
        except Exception as e:
            print(f"Failed during clip extension: {str(e)}")
            break

    # Verify we actually extended the video
    if len(clips) <= 1:
        print("No extension performed, returning original")
        return video_path

    # Check all clips before concatenation
    print("\nClip properties before concatenation:")
    for i, clip in enumerate(clips):
        try:
            probe = ffmpeg.probe(clip)
            # print(f"Clip {i+1}: {os.path.basename(clip)}")
            # print(f"  Size: {os.path.getsize(clip)/1024/1024:.2f}MB")
            for stream in probe['streams']:
                if stream['codec_type'] == 'video':
                    print(f"  Video: {stream['codec_name']} {stream['width']}x{stream['height']}")
                elif stream['codec_type'] == 'audio':
                    print(f"  Audio: {stream['codec_name']}")
        except Exception as e:
            print(f"Error checking clip {clip}: {str(e)}")
            return video_path

    # Concatenation using demuxer method (most reliable)
    extended_video_path = "extended_video.mp4"
    concat_list_path = "concat_list.txt"

    try:
        # Create concat list file
        with open(concat_list_path, 'w') as f:
            for clip in clips:
                f.write(f"file '{os.path.abspath(clip)}'\n")

        # Build ffmpeg command
        cmd = [
            'ffmpeg', '-y',
            '-f', 'concat',
            '-safe', '0',
            '-i', concat_list_path,
            '-c', 'copy'  # Stream copy (no re-encoding)
        ]

        # For some formats, we need to force MP4 output
        if not extended_video_path.endswith('.mp4'):
            cmd.extend(['-f', 'mp4'])

        cmd.append(extended_video_path)

        # Run command with error capture
        result = subprocess.run(
            cmd,
            check=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True
        )
        print("Concatenation successful!")

    except subprocess.CalledProcessError as e:
        print(f"Concatenation failed with error:\n{e.stderr}")
        return video_path
    except Exception as e:
        print(f"Unexpected error during concatenation: {str(e)}")
        return video_path
    finally:
        # Cleanup temporary files
        if os.path.exists(concat_list_path):
            os.remove(concat_list_path)

        # Remove intermediate reversed clips
        for clip in clips[1:]:
            if os.path.exists(clip):
                try:
                    os.remove(clip)
                except Exception as e:
                    print(f"Warning: Could not remove {clip}: {str(e)}")

    # Verify output
    if not os.path.exists(extended_video_path) or os.path.getsize(extended_video_path) == 0:
        print("Error: Final extended video not created properly")
        return video_path

    final_duration = get_video_duration(extended_video_path)
    print(f"Final extended duration: {final_duration:.2f}s")

    return extended_video_path


def reverse_video(video_path, audio_exists):
    """Create a reversed version of the video"""
    reversed_path = f"reversed_{os.path.basename(video_path)}"
    try:
        if audio_exists:
            (
                ffmpeg.input(video_path)
                .output(reversed_path, vf='reverse', af='areverse')
                .run(overwrite_output=True, capture_stdout=True, capture_stderr=True)
            )
        else:
            (
                ffmpeg.input(video_path)
                .output(reversed_path, vf='reverse')
                .run(overwrite_output=True, capture_stdout=True, capture_stderr=True)
            )
        return reversed_path
    except ffmpeg.Error as e:
        print(f"Reverse failed: {e.stderr.decode()}")
        raise

def has_audio(video_path):
    """Check if video contains audio stream"""
    try:
        probe = ffmpeg.probe(video_path, v='error', select_streams='a')
        return len(probe['streams']) > 0
    except ffmpeg.Error:
        return False

def get_video_duration(video_path):
    """Get duration in seconds"""
    try:
        probe = ffmpeg.probe(video_path, v='error', select_streams='v:0', show_entries='format=duration')
        return float(probe['format']['duration'])
    except Exception as e:
        print(f"Duration check failed: {str(e)}")
        return 0
# End of new functions




video_upload = widgets.FileUpload(accept=".mp4", multiple=False, description="Upload Video")
audio_upload = widgets.FileUpload(accept=".wav,.mp3,.aac,.flac", multiple=False, description="Upload Audio")
seed_input = widgets.IntText(value=1247, description="Seed:")
num_steps_input = widgets.IntSlider(value=20, min=1, max=100, step=1, description="Steps:")
guidance_scale_input = widgets.FloatSlider(value=1.0, min=0.1, max=10.0, step=0.1, description="Guidance Scale:")
video_scale_input = widgets.FloatSlider(value=0.5, min=0.1, max=1.0, step=0.1, description="Video Scale:")
output_fps_input = widgets.IntSlider(value=25, min=6, max=60, step=1, description="Output FPS:")
width, height = 0, 0

run_button = widgets.Button(description="Run Inference")
output_display = widgets.Output()

def on_run_button_click(change):
    with output_display:
        output_display.clear_output()

        print("Checking Video...")
        if not video_upload.value or not audio_upload.value:
            print("Please upload both video and audio files.")
            return


        video_file_info = next(iter(video_upload.value.values()))
        video_path = "uploaded_video.mp4"
        with open(video_path, "wb") as f:
            f.write(video_file_info['content'])

        global width, height
        if width <= 0 or height <= 0:
            print("Setting output video's width & height.")
            import cv2
            cap = cv2.VideoCapture(video_path)
            if cap.isOpened():
                width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            else:
                print("Error: Unable to open video file.")
            cap.release()

        # print("Uploading Audio...")
        audio_file_info = next(iter(audio_upload.value.values()))
        audio_path = "uploaded_audio.mp3"
        with open(audio_path, "wb") as f:
            f.write(audio_file_info['content'])

        video_path = convert_video_fps(video_path, 25)

        audio_path, num_frames, audio_duration = pad_audio_to_multiple_of_16(audio_path, target_fps=25)


        video_duration = get_video_duration (video_path)

        if audio_duration > video_duration:
            video_path = extend_video(video_path, audio_duration)
            video_duration = get_video_duration (video_path)
            if video_duration > audio_duration:
                video_path = trim_video(video_path, audio_duration)

        elif video_duration > audio_duration:
            video_path = trim_video(video_path, audio_duration)

        try:
            print("Running inference...")
            output_path = "output_video.mp4"
            perform_inference(video_path, audio_path, seed_input.value, num_steps_input.value, guidance_scale_input.value, output_path)

            output_path = convert_video_fps(output_path, output_fps_input.value)

            print("Inference complete. Displaying output video:")
            from IPython.display import Video
            if width <= 0 :
                display(Video(output_path, embed=True))
            else:
                display(Video(output_path, embed=True, width=int(width * video_scale_input.value), height=int(height * video_scale_input.value)))

            # print("Download output video")
            # files.download(output_path)

        finally:
            torch.cuda.empty_cache()
            for file in [video_path, audio_path]:
                if os.path.exists(file):
                    os.remove(file)

run_button.on_click(on_run_button_click)
widgets_box = widgets.VBox([video_upload, audio_upload, seed_input, num_steps_input, guidance_scale_input, video_scale_input, output_fps_input, run_button, output_display])
display(widgets_box)
