In [None]:
!pip install -qU diffusers accelerate transformers huggingface_hub

In [None]:
from huggingface_hub import notebook_login
notebook_login()

# Load community pipelines and components

## Community pipelines

Community pipelines are any `DiffusionPipeline` class that are different from the original paper implementation.

There are two types of community pipelines, those stored on the HuggingFace Hub and those stored on Diffusers GitHub repository.

Hub pipelines are completely customizable (schedulers, models, pipeline code, etc.) while Diffusers GitHub pipelines are only limited to custom pipeline code.

### Load from a local file

In [None]:
from transformers import CLIPImageProcessor, CLIPModel

clip_model_id = 'laion/CLIP-ViT-B-32-laion2B-s34B-b79K'
clip_model = CLIPModel.from_pretrained(clip_model_id)
feature_extractor = CLIPImageProcessor.from_pretrained(clip_model_id)

In [None]:
from diffusers import DiffusionPipeline

pipeline = DiffusionPipeline.from_pretrained(
    'stable-diffusion-v1-5/stable-diffusion-v1-5',
    custom_pipeline="<path_to_pipeline_directory>", # change path
    clip_model=clip_model,
    feature_extractor=feature_extractor,
    use_safetensors=True,
)

### Load from a specific version

By default, community pipelines are loaded from the latest stable version of Diffusers.

Load from the main branch:

In [None]:
pipeline = DiffusionPipeline.from_pretrained(
    'stable-diffusion-v1-5/stable-diffusion-v1-5',
    custom_pipeline="clip_guided_stable_diffusion",
    custom_revision='main',
    clip_model=clip_model,
    feature_extractor=feature_extractor,
    use_safetensors=True,
)

Load from a previous version of Diffusers:

In [None]:
pipeline = DiffusionPipeline.from_pretrained(
    'stable-diffusion-v1-5/stable-diffusion-v1-5',
    custom_pipeline="clip_guided_stable_diffusion",
    custom_revision='v0.25.0',
    clip_model=clip_model,
    feature_extractor=feature_extractor,
    use_safetensors=True,
)

### Load with `from_pipe`

Using `from_pipe()` method to load and reuse pipelines without any additional memory overhead.

For example, we can load a community pipeline that supports long prompts with weighting from a Stable Diffusion pipeline.

In [None]:
from diffusers import StableDiffusionPipeline
import torch

pipe_sd = DiffusionPipeline.from_pretrained(
    'emilianJR/CyberRealistic_V3',
    torch_dtype=torch.float16,
)
pipe_sd.to('cuda')

In [None]:
# long prompt weighting pipeline
pipe_lpw = DiffusionPipeline.from_pipe(
    pipe_sd,
    custom_pipeline='lpw_stable_diffusion',
).to('cuda')

In [None]:
prompt = "cat, hiding in the leaves, ((rain)), zazie rainyday, beautiful eyes, macro shot, colorful details, natural lighting, amazing composition, subsurface scattering, amazing textures, filmic, soft light, ultra-detailed eyes, intricate details, detailed texture, light source contrast, dramatic shadows, cinematic light, depth of field, film grain, noise, dark background, hyperrealistic dslr film still, dim volumetric cinematic lighting"
neg_prompt = "(deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, mutated hands and fingers:1.4), (deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, disconnected limbs, mutation, mutated, ugly, disgusting, amputation"
generator = torch.Generator(device='cpu').manual_seed(111)

out_lpw = pipe_lpw(
    prompt,
    neg_prompt,
    width=512,
    height=512,
    max_embeddings_multiples=3,
    num_inference_steps=50,
    generator=generator,
).images[0]
out_lpw

## Example community pipelines

We can find all community pipelines in the `diffusers/examples/community` under the official GitHub repository.

### Marigold

`Marigold` is a depth estimation diffusion pipeline that uses the rich existing and inherent visual knowledge in diffusion models. It takes an input image and denoises it into a depth map.

In [None]:
import torch
from PIL import Image
from diffusers import DiffusionPipeline
from diffusers.utils import load_image

pipeline = DiffusionPipeline.from_pretrained(
    'prs-eth/marigold-lcm-v1-0',
    custom_pipeline='marigold_depth_estimation',
    torch_dtype=torch.float16,
    variant='fp16',
)
pipeline.to('cuda')

In [None]:
image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/community-marigold.png")

output = pipeline(
    image,
    denoising_steps=4,
    ensemble_size=5,
    processing_size=768,
    match_input_res=True,
    batch_size=0,
    seed=111,
    color_map='Spectral',
    show_progress_bar=True,
)

depth_colored: Image.Image = output.depth_colored

### HD-Painter

`HD-Painter` is a high-resolution inpainting pipeline. It introduces a *Prompt-Aware Introverted Attention (PAIntA)* layer to better align a prompt with the area to be painted, and *Reweighting Attention Score Guidance (RASG)* to keep the latents more prompt-aligned and within their trained domain to generate realistic images.

In [None]:
import torch
from diffusers import DiffusionPipeline, DDIMScheduler
from diffusers.utils import load_image

pipeline = DiffusionPipeline.from_pretrained(
    'stable-diffusion-v1-5/stable-diffusion-v1-5-inpainting',
    custom_pipeline='hd_painter',
)
pipeline.scheduler = DDIMScheduler.from_config(pipeline.scheduler.config)

In [None]:
init_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/hd-painter.jpg")
mask_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/hd-painter-mask.png")
prompt = "football"

image = pipeline(
    prompt,
    init_image,
    mask_image,
    use_rasg=True,
    use_painta=True,
    generator=torch.Generator(device='cpu').manual_seed(111),
).images[0]
image

## Community components

Community components allow us to build pipelines that may have customized components that are not a part of Diffusers.

In this section, we will build a customized pipeline using the `showlab/show-1-base`

1. Import and load the text encoder from Transformers:

In [None]:
from transformers import T5Tokenizer, T5EncoderModel

pipe_id = 'showlab/show-1-base'
tokenizer = T5Tokenizer.from_pretrained(pipe_id, subfolder='tokenizer')
text_encoder = T5EncoderModel.from_pretrained(pipe_id, subfolder='text_encoder')

2. Load a scheduler:

In [None]:
from diffusers import DPMSolverMultistepScheduler

scheduler = DPMSolverMultistepScheduler.from_pretrained(pipe_id, subfolder='scheduler')

3. Load an image processor:

In [None]:
from transformers import CLIPImageProcessor

feature_extractor = CLIPImageProcessor.from_pretrained(pipe_id, subfolder='feature_extractor')

4. Load a custom UNet. The implemented class can be found under [showone_unet_3d_condition.py](https://huggingface.co/sayakpaul/show-1-base-with-code/tree/main). Make sure to import properly.

In [None]:
from showone_unet_3d_condition import ShowOneUNet3DConditionModel

unet = ShowOneUNet3DConditionModel.from_pretrained(pipe_id, subfolder='unet')

5. Load a custom pipeline code. The implemented class can be found under [pipeline_t2v_base_pixel.py](https://huggingface.co/sayakpaul/show-1-base-with-code/blob/main/pipeline_t2v_base_pixel.py). Make sure to import properly.

In [None]:
from pipeline_t2v_base_pixel import Text2VideoIFPipeline
import torch

pipeline = Text2VideoIFPipeline(
    unet=unet,
    text_encoder=text_encoder,
    tokenizer=tokenizer,
    scheduler=scheduler,
    feature_extractor=feature_extractor,
)
pipeline.to('cuda')
pipeline.torch_dtype = torch.float16

In [None]:
prompt = "a cat sitting on a chair"

# text embeds
prompt_embeds, negative_embeds = pipeline.encode_prompt(prompt)

# Keyframes generation (8x64x40, 2fps)
video_frames = pipeline(
    prompt_embeds=prompt_embeds,
    negative_prompt_embeds=negative_embeds,
    num_frames=8,
    height=40,
    width=64,
    num_inference_steps=2,
    guidance_scale=9.0,
    output_type='pt',
).frames