# SVD: Stable Video Diffusion

[![Model on HF](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model-blue)](https://huggingface.co/stabilityai/stable-video-diffusion-img2vid)
[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/nguu/maze/blob/main/Stable_Video_Diffusion.ipynb)
[![GitHub Repository](https://img.shields.io/github/stars/Stability-AI/generative-models?style=social)](https://github.com/Stability-AI/generative-models)


## Enviroment


In [None]:
import os, shutil

USE_DRIVE = False #@param {type:'boolean'}
if USE_DRIVE:
    from google.colab import drive
    drive.mount('/content/drive')

%pip install -q -U diffusers transformers accelerate

import torch
from diffusers import (
    StableVideoDiffusionPipeline,
    DiffusionPipeline,
    DPMSolverMultistepScheduler,
    DPMSolverSinglestepScheduler,
    KDPM2DiscreteScheduler,
    EulerDiscreteScheduler,
    EulerAncestralDiscreteScheduler,
    HeunDiscreteScheduler,
)
from diffusers.utils import export_to_video

def get_scheduler(name: str, pipe: DiffusionPipeline):
    match name:
      case "DPM++ 2M":
        sampler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
      case "DPM++ 2M Karras":
        sampler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config, use_karras_sigmas=True)
      case "DPM++ 2M SDE":
        sampler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config, algorithm_type="sde-dpmsolver++")
      case "DPM++ 2M SDE Karras":
        sampler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config, use_karras_sigmas=True, algorithm_type="sde-dpmsolver++")
      case "DPM++ SDE":
        sampler = DPMSolverSinglestepScheduler.from_config(pipe.scheduler.config)
      case "DPM++ SDE Karras":
        sampler = DPMSolverSinglestepScheduler.from_config(pipe.scheduler.config, use_karras_sigmas=True)
      case "DPM2":
        sampler = KDPM2DiscreteScheduler.from_config(pipe.scheduler.config)
      case "DPM2 Karras":
        sampler = KDPM2DiscreteScheduler.from_config(pipe.scheduler.config, use_karras_sigmas=True)
      case "Euler":
        sampler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
      case "Euler a":
        sampler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
      case "Heun":
        sampler = HeunDiscreteScheduler.from_config(pipe.scheduler.config)
      case _:
        sampler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
    return sampler

pipe = StableVideoDiffusionPipeline.from_pretrained(
    "stabilityai/stable-video-diffusion-img2vid-xt",
    torch_dtype=torch.float16,
    variant="fp16",
    token=False,
)
pipe.enable_model_cpu_offload()

## Generate Video

In [None]:
import random
from datetime import datetime
from PIL import Image

OUTPUT_DIR = '/content/output' #@param {type:'string'}
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Load the conditioning image
IMAGE = '/content/20231224-213613-001.png' #@param {type:'string'}
SAMPLER = 'Euler' #@param ['Euler', 'Euler a', 'DPM++ 2M', 'DPM++ 2M Karras', 'DPM++ 2M SDE', 'DPM++ 2M SDE Karras', 'DPM++ SDE', 'DPM++ SDE Karras', 'DPM2', 'DPM2 Karras', 'Heun']
MIN_CFG = 1.0 # @param {type:'slider', min:1, max:5, step:0.1}
MAX_CFG = 2 # @param {type:'slider', min:1, max:8, step:0.1}
MOTION_BUCKET_ID = 2 #@param {type:'integer'}
NOISE_AUG_STRENGTH = 0.03 # @param {type:"slider", min:0, max:1, step:0.01}
FPS = 12 #@param {type:'integer'}
NUM_FRAMES = 25 #@param {type:'integer'}
STEPS = 21 #@param {type:'integer'}
CHUNK_SIZE = 6 #@param {type:'integer'}
SEED = -1 #@param {type:'integer'}

if SEED == -1:
  SEED = random.randint(100000000, 999999999)

OUTPUT_NAME = '' #@param {type:'string'}
if OUTPUT_NAME == '':
  OUTPUT_NAME = datetime.now().strftime('%Y%m%d-%H%M%S')

img = Image.open(IMAGE).convert('RGB')
w, h = img.size

generator = torch.manual_seed(SEED)
pipe.scheduler = get_scheduler(SAMPLER, pipe)
frames = pipe(
    img,
    fps=FPS,
    decode_chunk_size=CHUNK_SIZE,
    motion_bucket_id=MOTION_BUCKET_ID,
    noise_aug_strength=NOISE_AUG_STRENGTH,
    num_frames=NUM_FRAMES,
    num_inference_steps=STEPS,
    min_guidance_scale=MIN_CFG,
    max_guidance_scale=MAX_CFG,
    height=h,
    width=w,
    generator=generator,
).frames[0]

meta_str = ' '.join([
    f'(MOTION_{MOTION_BUCKET_ID})',
    f'(CFG_{MIN_CFG}-{MAX_CFG})',
    f'(NOISE_{NOISE_AUG_STRENGTH})',
    f'(SEED_{SEED})',
    f'({w}x{h})',
])
output_video_path = f'{OUTPUT_DIR}/{OUTPUT_NAME} {meta_str}.mp4'
export_to_video(frames, output_video_path, fps=FPS)

EXPORT_LAST_FRAME = False #@param {type:'boolean'}
if EXPORT_LAST_FRAME:
  output_frame_path = f'{OUTPUT_DIR}/{OUTPUT_NAME} {meta_str}.jpg'
  last_frame = frames[-1]
  last_frame.save(output_frame_path)


In [None]:
#@title Clean output directory
shutil.rmtree(OUTPUT_DIR)
os.makedirs(OUTPUT_DIR, exist_ok=True)
