<a href="https://colab.research.google.com/github/prakhar760/GenerativeAI/blob/main/Diffusers/Diffusers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# install diffusers
!pip install diffusers==0.11.1

In [None]:
# use the google/ddpm-celebahq-256 model, built in collaboration by Google and U.C.Berkeley. It's a model following the Denoising Diffusion Probabilistic Models (DDPM) algorithm trained on a dataset of celebrities images. We can import the DDPMPipeline, which will allow you to do inference with a couple of lines of code.
from diffusers import DDPMPipeline

In [None]:
# The from_pretrained() method allows downloading the model and its configuration from the Hugging Face Hub
image_pipe = DDPMPipeline.from_pretrained("google/ddpm-celebahq-256")
image_pipe.to("cuda")

In [None]:
# To generate an image, we simply run the pipeline and don't even need to give it any input, it will generate a random initial noise sample and then iterate the diffusion process.
images = image_pipe().images

In [None]:
# print the generated image.
images[0]

In [None]:
# to view pipeline configurations.
image_pipe

In [None]:
# load a simple unconditional image generation model of type UNet2DModel which was released with the DDPM Paper and for instance take a look at another checkpoint trained on church images: google/ddpm-church-256.
from diffusers import UNet2DModel

In [None]:
# to view model configuration
model.config

In [None]:
# pass the config as an unpacked dict to the UNet2DModel class
model_random = UNet2DModel(**model.config)

In [None]:
# save the created model
model_random.save_pretrained("my_model")

In [None]:
# a random gaussian sample in the shape of an image (batch_size × in_channels × sample_size × sample_size). We have a batch axis because a model can receive multiple random noises, a channel axis because each one consists of multiple channels (such as red-green-blue), and finally sample_size corresponds to the height and width
import torch

torch.manual_seed(0)

noisy_sample = torch.randn(
    1, model.config.in_channels, model.config.sample_size, model.config.sample_size
)
noisy_sample.shape

In [None]:
with torch.no_grad():
    noisy_residual = model(sample=noisy_sample, timestep=2).sample

In [None]:
# The predicted noisy_residual has the exact same shape as the input and we use it to compute a slightly less noised image. Let's confirm the output shapes match
noisy_residual.shape

In [None]:
# load DDPMScheduler
from diffusers import DDPMScheduler

scheduler = DDPMScheduler.from_config(repo_id)

In [None]:
scheduler.config

In [None]:
# Like the models, schedulers can be saved and loaded with save_config() and from_config().
scheduler.save_config("my_scheduler")
new_scheduler = DDPMScheduler.from_config("my_scheduler")

In [None]:
less_noisy_sample = scheduler.step(
    model_output=noisy_residual, timestep=2, sample=noisy_sample
).prev_sample
less_noisy_sample.shape

In [None]:
# define a display function that takes care of post-processing the denoised image, convert it to a PIL.Image and displays it.
import PIL.Image
import numpy as np

def display_sample(sample, i):
    image_processed = sample.cpu().permute(0, 2, 3, 1)
    image_processed = (image_processed + 1.0) * 127.5
    image_processed = image_processed.numpy().astype(np.uint8)

    image_pil = PIL.Image.fromarray(image_processed[0])
    display(f"Image at step {i}")
    display(image_pil)

In [None]:
# move the input and model to the GPU to speed up the denoising process a bit.
model.to("cuda")
noisy_sample = noisy_sample.to("cuda")

In [None]:
# define the denoising loop. This loop prints out the (less and less) noisy samples along the way for better visualization in the denoising loop.
import tqdm

sample = noisy_sample

for i, t in enumerate(tqdm.tqdm(scheduler.timesteps)):
  # 1. predict noise residual
  with torch.no_grad():
      residual = model(sample, t).sample

  # 2. compute less noisy image and set x_t -> x_t-1
  sample = scheduler.step(residual, t, sample).prev_sample

  # 3. optionally look at image
  if (i + 1) % 50 == 0:
      display_sample(sample, i + 1)

In [None]:
# load DDIMScheduler
from diffusers import DDIMScheduler

scheduler = DDIMScheduler.from_config(repo_id)

In [None]:
# set the DDIM inference steps to 50
scheduler.set_timesteps(num_inference_steps=50)

In [None]:
import tqdm

sample = noisy_sample

for i, t in enumerate(tqdm.tqdm(scheduler.timesteps)):
  # 1. predict noise residual
  with torch.no_grad():
      residual = model(sample, t).sample

  # 2. compute previous image and set x_t -> x_t-1
  sample = scheduler.step(residual, t, sample).prev_sample

  # 3. optionally look at image
  if (i + 1) % 10 == 0:
      display_sample(sample, i + 1)