In [1]:
adapter_ids = {
    'guoyww_v2': "guoyww/animatediff-motion-adapter-v1-5-2", # original motion adapter
    'guoyww_v3': "guoyww/animatediff-motion-adapter-v1-5-3",
    'guoyww_sdxl': "guoyww/animatediff-motion-adapter-sdxl-beta",
    'wangfuyun': "wangfuyun/AnimateLCM",
}
model_ids = {
    'frankjoshua': "frankjoshua/toonyou_beta6",
    'SG161222': "SG161222/Realistic_Vision_V5.1_noVAE",
    'emilianJR': "emilianJR/epiCRealism",
    'stabilityai': "stabilityai/stable-diffusion-xl-base-1.0",
}
controlnet_ids = {
    'lllyasviel': "lllyasviel/control_v11f1p_sd15_depth",
    'guoyww_scr': "guoyww/animatediff-sparsectrl-scribble",
    'guoyww_rgb': "guoyww/animatediff-sparsectrl-rgb",
}
vae_ids = {
    'stabilityai': "stabilityai/sd-vae-ft-mse",
}
lora_adapter_ids = {
    'guoyww': "guoyww/animatediff-motion-lora-v1-5-3",
    'wangfuyun': "wangfuyun/AnimateLCM",
    }
depth_detector_ids = {
    'lllyasviel': "lllyasviel/Annotators",
}

In [2]:
from diffusers import AnimateDiffPipeline, LCMScheduler, MotionAdapter, AutoencoderKL
import torch
from diffusers.utils import export_to_gif, load_image, load_video, export_to_video

device = "cuda" if torch.cuda.is_available() else "cpu"
dtype = torch.float16

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

In [None]:
motion_adapter = MotionAdapter.from_pretrained("wangfuyun/AnimateLCM", torch_dtype=dtype)
vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=dtype)

model_id = "frankjoshua/toonyou_beta6" # frankjoshua/toonyou_beta6, emilianJR/epiCRealism, SG161222/Realistic_Vision_V5.1_noVAE
pipe = AnimateDiffPipeline.from_pretrained(model_id, motion_adapter=motion_adapter, vae=vae, torch_dtype=dtype)
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config, beta_schedule="linear", torch_dtype=dtype)

pipe.load_lora_weights(
    lora_adapter_ids['wangfuyun'], weight_name="AnimateLCM_sd15_t2v_lora.safetensors", adapter_name="lcm-lora",
    )
pipe.load_lora_weights(lora_adapter_ids['guoyww'], adapter_name="motion_lora") # more lora adapter at https://huggingface.co/guoyww
pipe.set_adapters("lcm-lora", [0.8])
# pipe.set_adapters(["lcm-lora", "motion_lora"], adapter_weights=[0.8, 1.0])

# Enable FreeNoise for long prompt generation
pipe.enable_free_noise(context_length=16, context_stride=4)
pipe.to(device)
# Save memory
pipe.enable_free_noise_split_inference()
pipe.unet.enable_forward_chunking()

# Can be a single prompt, or a dictionary with frame timesteps
i=100
prompt = {
    0: """
    Cartoon style portrait of a 34-year-old white man with a scruffy brown beard, messy hair, and tired eyes, walking out of 'PickWick Mini Mart' at night. He’s wearing a wrinkled blue uniform polo with a worn-out name tag, slouched shoulders, and carrying an energy drink can, looking exhausted and fed up. The neon store sign flickers, casting an eerie glow onto the cracked pavement, with a lonely parking lot and distant traffic in the background, cinematic night-time atmosphere.""",
    i*1: """
    Inside a messy apartment with fast food wrappers, empty energy drink cans, and BMX magazines scattered on a stained couch, the same scruffy man sits slouched, watching an X-Games documentary on an old TV, eyes wide with excitement, imagining himself as a pro BMX rider, dim lighting from a single lamp casting long shadows.
    """,
    i*2: """
    Above his head, a vivid daydream of himself as 'Benny the Bullet,' wearing a neon-green Monster Energy-branded BMX suit, soaring through the air in a dramatic mid-air stunt with flashing cameras and cheering crowds, bright, exaggerated cartoon-style visuals with bold motion lines.
    """,
    i*3: """
    A Reddit thread glowing on his laptop screen, with a bold post reading ‘Money is just energy—GO GET IT,’ the man's eyes lighting up with a sudden burst of motivation, exaggerated cartoon-like glow around his face, a mix of ambition and delusion.
    """,
    i*4: """
    Benny standing proudly outside a large bank, wearing sunglasses and a scarf, with a confident smirk, imagining himself casually walking inside to claim his fortune, vibrant and humorous composition with a slightly exaggerated comic book effect.
    """,
}
negative_prompt = "bad quality, worst quality, jpeg artifacts"

# Run inference
output = pipe(
    prompt=prompt,
    negative_prompt=negative_prompt,
    num_frames=i*5,
    guidance_scale=2.5,
    num_inference_steps=10,
    generator=torch.Generator('cpu').manual_seed(42),
)

# Save video
frames = output.frames[0]
export_to_video(frames, "outputs/output1.mp4", fps=16)

config.json:   0%|          | 0.00/376 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/1.82G [00:00<?, ?B/s]

config.json:   0%|          | 0.00/547 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

model_index.json:   0%|          | 0.00/579 [00:00<?, ?B/s]

Fetching 13 files:   0%|          | 0/13 [00:00<?, ?it/s]

pytorch_model.bin:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

scheduler/scheduler_config.json:   0%|          | 0.00/374 [00:00<?, ?B/s]

safety_checker/config.json:   0%|          | 0.00/697 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/492M [00:00<?, ?B/s]

(…)ature_extractor/preprocessor_config.json:   0%|          | 0.00/520 [00:00<?, ?B/s]

tokenizer/special_tokens_map.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

diffusion_pytorch_model.bin:   0%|          | 0.00/3.44G [00:00<?, ?B/s]

tokenizer/vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

text_encoder/config.json:   0%|          | 0.00/565 [00:00<?, ?B/s]

unet/config.json:   0%|          | 0.00/1.68k [00:00<?, ?B/s]

tokenizer/tokenizer_config.json:   0%|          | 0.00/737 [00:00<?, ?B/s]

tokenizer/merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]

An error occurred while trying to fetch /home/users/rhassanzadeh1/.cache/huggingface/hub/models--frankjoshua--toonyou_beta6/snapshots/9bb5e1c5be60ab664ec80bff429126e0844ac9fe/unet: Error no file named diffusion_pytorch_model.safetensors found in directory /home/users/rhassanzadeh1/.cache/huggingface/hub/models--frankjoshua--toonyou_beta6/snapshots/9bb5e1c5be60ab664ec80bff429126e0844ac9fe/unet.
Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.


AnimateLCM_sd15_t2v_lora.safetensors:   0%|          | 0.00/135M [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/102M [00:00<?, ?B/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (111 > 77). Running this sequence through the model will result in indexing errors
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['the neon store sign flickers, casting an eerie glow onto the cracked pavement, with a lonely parking lot and distant traffic in the background, cinematic night - time atmosphere.', '<|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|>', '<|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoft

  0%|          | 0/10 [00:00<?, ?it/s]