In [1]:
! nvidia-smi

Mon Sep 19 03:33:30 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.57.02    Driver Version: 470.57.02    CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   38C    P0    60W / 400W |      0MiB / 40536MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA A100-SXM...  Off  | 00000000:00:05.0 Off |                    0 |
| N/A   36C    P0    59W / 400W |      0MiB / 40536MiB |      0%      Default |
|       

In [19]:
from PIL import Image
from diffusers import AutoencoderKL, DDPMScheduler, PNDMScheduler, StableDiffusionPipeline, UNet2DConditionModel
from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker
from pathlib import Path
import torch
from multiprocessing import Pool


In [3]:

num_samples = 5
prompt = [""] * num_samples


In [4]:
def image_grid(imgs, rows, cols):
    assert len(imgs) == rows*cols

    w, h = imgs[0].size
    grid = Image.new('RGB', size=(cols*w, rows*h))
    grid_w, grid_h = grid.size
    
    for i, img in enumerate(imgs):
        grid.paste(img, box=(i%cols*w, i//cols*h))
    return grid

In [8]:
TOKEN = "AUTH_TOKEN"
def load_pipeline(step: int):
    if step == 0:
        # load the original model
        pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", use_auth_token=TOKEN)
    else:
        # fine tuned models on Google cloud compute engine
        model_path = f"/user/disks/sdc/runs/finetune_cocomelon/step-{step}"
        text_encoder = CLIPTextModel.from_pretrained(
            model_path, subfolder="text_encoder"
        )
        vae = AutoencoderKL.from_pretrained(
            model_path, subfolder="vae"
        )
        unet = UNet2DConditionModel.from_pretrained(
            model_path, subfolder="unet"
        )

        tokenizer = CLIPTokenizer.from_pretrained(
            "CompVis/stable-diffusion-v1-4", subfolder="tokenizer", use_auth_token=TOKEN
        )

        pipe = StableDiffusionPipeline(
            text_encoder=text_encoder,
            vae=vae,
            unet=unet,
            tokenizer=tokenizer,
            scheduler=PNDMScheduler(
                beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", skip_prk_steps=True
                ),
            # safety_checker=StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker"),
            feature_extractor=CLIPFeatureExtractor.from_pretrained("openai/clip-vit-base-patch32"),
            )

    return pipe

In [15]:
def save_grid(grid, first_step, last_step, prompt):
    counter = 0
    root_dir = Path("/home/user/thumbnail-stable-diffusion/data")
    
    name=f""
    while True:
        counter += 1
        fname = f"{prompt}_step_{first_step}_{last_step}_{counter}.png"
        path =  root_dir / fname
        if not path.exists():
            grid.save(str(root_dir / fname))
            break

In [16]:
def generate_samples(steps, prompt):
    tot_images = []
    for step in steps:
        pipe = load_pipeline(step)
        pipe.to("cuda")
        images = pipe(prompt).images
        tot_images.extend(images)
        
    grid = image_grid(tot_images, rows=len(steps), cols=len(prompt))
    save_grid(grid, steps[0], steps[-1], prompt[0])
    # grid.save(f"{prompt[0]}_step_{steps[0]}_{steps[-1]}_2.png")

In [18]:
prompt = ["Birthday at the farm song with cocomelon"] * 5
steps = [5000*i for i in range(2)]
# steps = [5000*i for i in range(2)]
generate_samples(steps, prompt)

{'trained_betas'} was not found in config. Values will be initialized to default values.
100%|██████████| 51/51 [00:22<00:00,  2.23it/s]
100%|██████████| 51/51 [00:22<00:00,  2.23it/s]


In [22]:
# let's try to make the generate faster by running on multiple GPUs


def generate_sample_single_step_single_gpu(gpu_idx, step, prompt):
    """
    takes a gpu index and a step number (number of fine tuning steps) and generates samples
    """
    device = f"cuda:{gpu_idx}"
    if step == 0:
        # load the original model
        pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", use_auth_token=TOKEN)
    else:
        # fine tuned models
        model_path = f"/user/disks/sdc/runs/finetune_cocomelon/step-{step}"
        text_encoder = CLIPTextModel.from_pretrained(
            model_path, subfolder="text_encoder"
        )
        vae = AutoencoderKL.from_pretrained(
            model_path, subfolder="vae"
        )
        unet = UNet2DConditionModel.from_pretrained(
            model_path, subfolder="unet"
        )

        tokenizer = CLIPTokenizer.from_pretrained(
            "CompVis/stable-diffusion-v1-4", subfolder="tokenizer", use_auth_token=TOKEN
        )

        pipe = StableDiffusionPipeline(
            text_encoder=text_encoder,
            vae=vae,
            unet=unet,
            tokenizer=tokenizer,
            scheduler=PNDMScheduler(
                beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", skip_prk_steps=True
                ),
            # safety_checker=StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker"),
            feature_extractor=CLIPFeatureExtractor.from_pretrained("openai/clip-vit-base-patch32"),
            )
    
    pipe.to("cuda")
    images = pipe(prompt).images
    return (images, step)


def generate_samples_multi_gpu(steps, prompt):
    tot_images = []


    # a pool of 4 processes where each one call generate_sample_single_step_single_gpu and gets the samples in return
    with Pool(processes=len(steps)) as pool:
        results = pool.starmap(generate_sample_single_step_single_gpu, [(i, step, prompt) for i, step in enumerate(steps)])

    results = sorted(results, key=lambda x: x[1])
    tot_images = [images for images, _ in results]

    grid = image_grid(tot_images, rows=len(steps), cols=len(prompt))

    save_grid(grid, steps[0], steps[-1], prompt[0])

In [None]:
steps = [5000*i for i in range(2)]
prompt = ["playing with baloons"] * 5
generate_samples_multi_gpu(steps, prompt)