## Load

In [None]:
import os
import torch

from animatediff.pipelines.pipeline_animation_inpaint_v3 import AnimationInpaintPipeline

from diffusers import DDIMScheduler, EulerDiscreteScheduler, PNDMScheduler
from transformers import CLIPTextModel, CLIPTokenizer
from diffusers import AutoencoderKL
from animatediff.models.unet import UNet3DConditionModel

stable_diffusion_model_path = "./models/StableDiffusion/ACertainThing/"

tokenizer = CLIPTokenizer.from_pretrained(stable_diffusion_model_path, subfolder="tokenizer")
text_encoder = CLIPTextModel.from_pretrained(stable_diffusion_model_path, subfolder="text_encoder").cuda()
vae = AutoencoderKL.from_pretrained(stable_diffusion_model_path, subfolder="vae").cuda()
unet = UNet3DConditionModel.from_pretrained_2d(stable_diffusion_model_path, subfolder="unet", unet_additional_kwargs={
    "unet_use_cross_frame_attention": False,
    "unet_use_temporal_attention": False,
    "use_motion_module": True,
    "motion_module_resolutions": [1, 2, 4, 8],
    "motion_module_mid_block": False,
    "motion_module_decoder_only": False,
    "motion_module_type": "Vanilla",
    "motion_module_kwargs": {
        "num_attention_heads": 8,
        "num_transformer_block": 1,
        "attention_block_types": ["Temporal_Self", "Temporal_Self"],
        "temporal_position_encoding": True,
        "temporal_position_encoding_max_len": 24,
        "temporal_attention_dim_div": 1
    }
}).cuda()
motion_module_path = os.path.join("./models/Motion_Module/", "mm_sd_v14.ckpt")
motion_module_state_dict = torch.load(motion_module_path, map_location="cpu")
missing, unexpected = unet.load_state_dict(motion_module_state_dict, strict=False)
assert len(unexpected) == 0
unet.enable_xformers_memory_efficient_attention()
scheduler = DDIMScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="linear")

pipeline = AnimationInpaintPipeline(vae=vae, text_encoder=text_encoder, tokenizer=tokenizer, unet=unet, scheduler=scheduler).to("cuda")

## Prompt

In [None]:
import PIL

configs = [
    {
        "prompt": "",
        "negative_prompt": "",
        "num_inference_steps": 25,
        "guidance_scale": 1,
        "width": 512,
        "height": 512,
        "video_length": 10,
        "seed": 1,
        "keyframes": {
            0: "images/0.jpeg",
            9: "images/15.jpeg",
        },
        "add_predicted_noise": False,
        "do_reconstruction_guidance": True,
        "reconstruction_guidance_scale": 1000
    },
]

## Inference

In [None]:
import time
from datetime import datetime

from animatediff.utils.util import save_videos_grid

for config in configs:
    torch.manual_seed(config["seed"])
    
    sample = pipeline(
        prompt = config["prompt"],
        negative_prompt = config["negative_prompt"],
        num_inference_steps = config["num_inference_steps"],
        guidance_scale = config["guidance_scale"],
        width = config["width"],
        height = config["height"],
        video_length = config["video_length"],
        keyframes = {k: PIL.Image.open(v) for k, v in config["keyframes"].items()},
        add_predicted_noise=config["add_predicted_noise"],
        do_reconstruction_guidance=config["do_reconstruction_guidance"],
        reconstruction_guidance_scale=config["reconstruction_guidance_scale"]
    ).videos
    
    savedir = os.path.join(os.getcwd(), "samples", datetime.now().strftime("Gradio-%Y-%m-%dT%H-%M-%S"))
    savedir_sample = os.path.join(savedir, "sample")
    os.makedirs(savedir, exist_ok=True)
    
    save_sample_path = os.path.join(savedir_sample, f"{str(int(time.time()))}.mp4")
    save_videos_grid(sample, save_sample_path)
    # save config
    with open(os.path.join(savedir, "prompt.txt"), "w") as f:
        f.write(str(config))