In [1]:
from diffusers import (
    UNet2DConditionModel,
    AutoencoderKL,
    DDIMScheduler,
    StableDiffusionPipeline,
)
from transformers import CLIPTextModel, CLIPTextConfig, CLIPTokenizer

unet = UNet2DConditionModel(
    block_out_channels=(32, 64),
    layers_per_block=2,
    sample_size=32,
    in_channels=4,
    out_channels=4,
    down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"),
    up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"),
    cross_attention_dim=32,
)

scheduler = DDIMScheduler(
    beta_start=0.00085,
    beta_end=0.012,
    beta_schedule="scaled_linear",
    clip_sample=False,
    set_alpha_to_one=False,
)

vae = AutoencoderKL(
    block_out_channels=[32, 64],
    in_channels=3,
    out_channels=3,
    down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"],
    up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"],
    latent_channels=4,
)

text_encoder_config = CLIPTextConfig(
    bos_token_id=0,
    eos_token_id=2,
    hidden_size=32,
    intermediate_size=37,
    layer_norm_eps=1e-05,
    num_attention_heads=4,
    num_hidden_layers=5,
    pad_token_id=1,
    vocab_size=1000,
)
text_encoder = CLIPTextModel(text_encoder_config)
tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")

  from .autonotebook import tqdm as notebook_tqdm
  deprecate("Transformer2DModelOutput", "1.0.0", deprecation_message)


In [3]:
vae = AutoencoderKL.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="vae", use_safetensors=True)
print(vae.config)

FrozenDict([('in_channels', 3), ('out_channels', 3), ('down_block_types', ['DownEncoderBlock2D', 'DownEncoderBlock2D', 'DownEncoderBlock2D', 'DownEncoderBlock2D']), ('up_block_types', ['UpDecoderBlock2D', 'UpDecoderBlock2D', 'UpDecoderBlock2D', 'UpDecoderBlock2D']), ('block_out_channels', [128, 256, 512, 512]), ('layers_per_block', 2), ('act_fn', 'silu'), ('latent_channels', 4), ('norm_num_groups', 32), ('sample_size', 512), ('scaling_factor', 0.18215), ('shift_factor', None), ('latents_mean', None), ('latents_std', None), ('force_upcast', True), ('use_quant_conv', True), ('use_post_quant_conv', True), ('_use_default_values', ['norm_num_groups', 'force_upcast', 'use_quant_conv', 'latents_std', 'use_post_quant_conv', 'latents_mean', 'shift_factor']), ('_class_name', 'AutoencoderKL'), ('_diffusers_version', '0.2.2'), ('_name_or_path', 'CompVis/stable-diffusion-v1-4')])


In [5]:
components = {
    "unet": unet,
    "scheduler": scheduler,
    "vae": vae,
    "text_encoder": text_encoder,
    "tokenizer": tokenizer,
    "safety_checker": None,
    "feature_extractor": None,
}

pipeline = StableDiffusionPipeline(**components)
pipeline.push_to_hub("ProteinDiffusion")

You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .
model.safetensors:   0%|          | 0.00/283k [00:00<?, ?B/s]
[A

[A[A

model.safetensors:   6%|▌         | 16.4k/283k [00:00<00:01, 140kB/s]

[A[A

model.safetensors: 100%|██████████| 283k/283k [00:00<00:00, 521kB/s] 


[A[A
[A

diffusion_pytorch_model.safetensors: 100%|██████████| 2.65M/2.65M [00:01<00:00, 2.24MB/s]
diffusion_pytorch_mo

CommitInfo(commit_url='https://huggingface.co/kkj15dk/ProteinDiffusion/commit/9850642ffd8c0c89ac326f790ad2bd420a6e181a', commit_message='Upload StableDiffusionPipeline', commit_description='', oid='9850642ffd8c0c89ac326f790ad2bd420a6e181a', pr_url=None, pr_revision=None, pr_num=None)