In [1]:
import numpy as np
import re
import torch
import PIL
import argparse
import os
import sys
sys.path.insert(0, '/data2/mhf/DXL/Lingxiao/Codes/HypDiffusion')
from torchvision.utils import make_grid
from tqdm import tqdm, trange
from omegaconf import OmegaConf
from PIL import Image
from itertools import islice
from einops import rearrange, repeat
from torch import autocast
from contextlib import nullcontext
from pytorch_lightning import seed_everything
import cv2
import time
from ldm.util import instantiate_from_config
from ldm.models.diffusion.plms import PLMSSampler
from ldm.models.diffusion.ddim_org import DDIMSampler
from ldm.models.diffusion.dpm_solver_org import DPMSolverSampler

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "2"

  from .autonotebook import tqdm as notebook_tqdm


## Define neccessary functions

In [2]:
from notebook_utils.pmath import *
# we utilize geoopt package for hyperbolic calculation
import geoopt.manifolds.stereographic.math as gmath
def chunk(it, size):
    it = iter(it)
    return iter(lambda: tuple(islice(it, size)), ())


def load_model_from_config(config, ckpt, verbose=False):
    print(f"Loading model from {ckpt}")
    pl_sd = torch.load(ckpt, map_location="cpu")
    if "global_step" in pl_sd:
        print(f"Global Step: {pl_sd['global_step']}")
    sd = pl_sd["state_dict"]
    model = instantiate_from_config(config.model)
    m, u = model.load_state_dict(sd, strict=False)
    if len(m) > 0 and verbose:
        print("missing keys:")
        print(m)
    if len(u) > 0 and verbose:
        print("unexpected keys:")
        print(u)

    model.cuda()
    model.eval()
    return model


def load_img(path, size=[256, 256]):
    image = cv2.imread(path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    w, h = image.shape[:2]
    print(f"loaded input image of size ({w}, {h}) from {path}")
    # resize to integer multiple of 32
    # w, h = map(lambda x: x - x % 32, (w, h))
    w, h = size
    image = cv2.resize(image, (w, h), interpolation=cv2.INTER_LANCZOS4)
    image = np.array(image).astype(np.uint8)

    image = (image / 127.5 - 1.0).astype(np.float32)
    image = image[None].transpose(0, 3, 1, 2)
    image = torch.from_numpy(image)
    return image


def load_model_and_get_prompt_embedding(model, scale, n_samples, device, prompts, inv=False):

    if inv:
        inv_emb = model.get_learned_conditioning(prompts, inv)
        c = uc = inv_emb
    else:
        inv_emb = None

    if scale != 1.0:
        uc = model.get_learned_conditioning(
            n_samples * [torch.zeros((1, 3, 224, 224))])
    else:
        uc = None
    c = model.get_learned_conditioning(prompts)

    return c, uc, inv_emb

def get_hyp_codes(model, prompts):
    # return latent codes in the hyperbolic space for the given prompts
    logits, feature_dist, feature_euc = model.get_learned_conditioning(prompts)
    return feature_dist

def get_condition_given_hyp_codes(model, hyp_codes):
    # return latent codes in the CLIP space for the given latent codes in hyperbolic space
    logits, feature_dist, feature_euc = model.get_learned_conditioning(hyp_codes, input_feature=True)
    return feature_euc


# rescale function


def rescale(target_radius, x):
    r_change = target_radius / \
        dist0(gmath.mobius_scalar_mul(
            r=torch.tensor(1), x=x, k=torch.tensor(-1.0)))
    return gmath.mobius_scalar_mul(r=r_change, x=x, k=torch.tensor(-1.0))

# function for generating images with fixed radius (also contains raw geodesic images of 'shorten' images, and stretched images to boundary)


def geo_interpolate_fix_r(x, y, interval, target_radius, save_codes=False):
    feature_geo = []
    feature_geo_normalized = []
    images_to_plot_raw_geo = []
    images_to_plot_target_radius = []
    images_to_plot_boundary = []
    dist_to_start = []
    target_radius_ratio = torch.tensor(target_radius/6.2126)
    geodesic_start_short = gmath.mobius_scalar_mul(
        r=target_radius_ratio, x=x, k=torch.tensor(-1.0))
    geodesic_end_short = gmath.mobius_scalar_mul(
        r=target_radius_ratio, x=y, k=torch.tensor(-1.0))
    index = 0
    for i in interval:
        # this is raw image on geodesic, instead of fixed radius
        feature_geo_current = gmath.geodesic(t=torch.tensor(
            i), x=geodesic_start_short, y=geodesic_end_short, k=torch.tensor(-1.0))

        # here we fix the radius and don't revert them now
        r_change = target_radius / \
            dist0(gmath.mobius_scalar_mul(r=torch.tensor(1),
                  x=feature_geo_current, k=torch.tensor(-1.0)))
        feature_geo.append(feature_geo_current)
        feature_geo_current_target_radius = gmath.mobius_scalar_mul(
            r=r_change, x=feature_geo_current, k=torch.tensor(-1.0))
        feature_geo_normalized.append(feature_geo_current_target_radius)
        dist = gmath.dist(
            geodesic_start_short, feature_geo_current_target_radius, k=torch.tensor(-1.0))
        dist_to_start.append(dist)

        # here is to revert the feature to boundary
        r_change_to_boundary = 6.2126 / \
            dist0(gmath.mobius_scalar_mul(r=torch.tensor(1),
                  x=feature_geo_current, k=torch.tensor(-1.0)))
        feature_geo_current_target_boundary = gmath.mobius_scalar_mul(
            r=r_change_to_boundary, x=feature_geo_current, k=torch.tensor(-1.0))

        with torch.no_grad():
            image_raw_geo, _, _, _, _ = net.forward(x=feature_geo_current.unsqueeze(
                0), codes=None, batch_size=1, input_feature=True, input_code=False)
            image, _, _, codes_to_save, _ = net.forward(x=feature_geo_current_target_radius.unsqueeze(
                0), codes=None, batch_size=1, input_feature=True, input_code=False)
            if save_codes:
                torch.save(codes_to_save, os.path.join(
                    '/data2/mhf/DXL/Lingxiao/Codes/hae_editing/save_latent_codes', str(i)+'_'+str(index)+'.pth'))
            index += 1
            image_boundary, _, _, _, _ = net.forward(x=feature_geo_current_target_boundary.unsqueeze(
                0), codes=None, batch_size=1, input_feature=True, input_code=False)
        images_to_plot_raw_geo.append(image_raw_geo)
        images_to_plot_target_radius.append(image)
        images_to_plot_boundary.append(image_boundary)

    return images_to_plot_raw_geo, images_to_plot_target_radius, images_to_plot_boundary, dist_to_start

# function for generating images with fixed radius with optional latent codes list output


def geo_interpolate_fix_r_with_codes(x, y, interval, target_radius):
    # please use this with batch_size = 1
    feature_geo = []
    feature_geo_normalized = []
    images_to_plot_raw_geo = []
    images_to_plot_target_radius = []
    images_to_plot_boundary = []
    dist_to_start = []
    target_radius_ratio = torch.tensor(target_radius/6.2126)
    geodesic_start_short = gmath.mobius_scalar_mul(
        r=target_radius_ratio, x=x, k=torch.tensor(-1.0))
    geodesic_end_short = gmath.mobius_scalar_mul(
        r=target_radius_ratio, x=y, k=torch.tensor(-1.0))
    for i in interval:
        # this is raw image on geodesic, instead of fixed radius
        feature_geo_current = gmath.geodesic(t=torch.tensor(
            i), x=geodesic_start_short, y=geodesic_end_short, k=torch.tensor(-1.0))

        # here we fix the radius and don't revert them now
        r_change = target_radius / \
            dist0(gmath.mobius_scalar_mul(r=torch.tensor(1),
                  x=feature_geo_current, k=torch.tensor(-1.0)))
        feature_geo.append(feature_geo_current)
        feature_geo_current_target_radius = gmath.mobius_scalar_mul(
            r=r_change, x=feature_geo_current, k=torch.tensor(-1.0))
        feature_geo_normalized.append(feature_geo_current_target_radius)
        dist = gmath.dist(
            geodesic_start_short, feature_geo_current_target_radius, k=torch.tensor(-1.0))
        dist_to_start.append(dist)
        # print(feature_geo_current_target_radius.norm())

        # here is to revert the feature to boundary
        r_change_to_boundary = 6.2126 / \
            dist0(gmath.mobius_scalar_mul(r=torch.tensor(1),
                  x=feature_geo_current, k=torch.tensor(-1.0)))
        feature_geo_current_target_boundary = gmath.mobius_scalar_mul(
            r=r_change_to_boundary, x=feature_geo_current, k=torch.tensor(-1.0))
        # print(feature_geo_current_target_boundary.norm())

        # now codes do not affect outputs
        with torch.no_grad():
            image_raw_geo, _, _, _, _ = net.forward(x=feature_geo_current.unsqueeze(
                0), codes=None, batch_size=1, input_feature=True, input_code=False)
            image, _, _, codes_target_radius, _ = net.forward(x=feature_geo_current_target_radius.unsqueeze(
                0), codes=None, batch_size=1, input_feature=True, input_code=False)
            image_boundary, _, _, codes_boundary, _ = net.forward(x=feature_geo_current_target_boundary.unsqueeze(
                0), codes=None, batch_size=1, input_feature=True, input_code=False)
        images_to_plot_raw_geo.append(image_raw_geo)
        images_to_plot_target_radius.append(image)
        images_to_plot_boundary.append(image_boundary)

    return images_to_plot_raw_geo, images_to_plot_target_radius, images_to_plot_boundary, dist_to_start, [codes_target_radius, codes_boundary, feature_geo_current_target_radius, feature_geo_current_target_boundary]

# the function defines easy perturbation on any given radius


def generate_perturbation_r_with_raw_inv(x, target_radius, interval, seed, size, save_codes=False):
    # 3 arguments, raw image feature, target radius and interval(actually the ratio).
    images_perturbed = []
    dist_perturbed = []
    torch.manual_seed(seed=seed)
    perturb = torch.rand(6, 512).cuda()
    for i in range(size):
        target_rad_perturb = 6.2126
        ratio = target_rad_perturb/dist0(perturb[i])
        perturb_current = gmath.mobius_scalar_mul(
            r=ratio, x=perturb[i], k=torch.tensor(-1.0))
        _, images_to_plot_target_radius, _, dist_to_start = geo_interpolate_fix_r(
            x=x, y=perturb_current, interval=interval, target_radius=target_radius, save_codes=save_codes)
        print(dist_to_start)
        dist_perturbed.append(dist_to_start[0])
        images_perturbed.append(images_to_plot_target_radius[0])

    raw_image, _, _, _ = geo_interpolate_fix_r(x=x, y=perturb_current, interval=[
                                               0], target_radius=target_radius)
    images_perturbed.insert(0, raw_image[0])
    return images_perturbed, dist_perturbed

# this further function allows using specific target image as perturbation


def generate_perturbation_r_with_raw_inv_pick(x, y, target_radius, interval, seed, save_codes=False):
    # 3 arguments, raw image feature, target radius and interval(actually the ratio).
    images_perturbed = []
    dist_perturbed = []
    torch.manual_seed(seed=seed)
    # perturb = torch.rand(6,512).cuda()
    perturb = y
    for i in range(len(y)):
        target_rad_perturb = 6.2126
        ratio = target_rad_perturb/dist0(perturb[i])
        perturb_current = gmath.mobius_scalar_mul(
            r=ratio, x=perturb[i], k=torch.tensor(-1.0))

        if False:
            with torch.no_grad():
                image, _, _, _, _ = net.forward(x=perturb_current.unsqueeze(
                    0), codes=None, batch_size=1, input_feature=True, input_code=False)

            fig = plt.figure(figsize=(5, 5))
            gs = fig.add_gridspec(1, 1)
            for i in range(1):
                if i == 0:
                    fig.add_subplot(gs[0, i])
                    plt.axis('off')
                    plt.title(f'perturb = {target_rad_perturb}')
                    plt.imshow(tensor2im(image.squeeze(0)))

        # interval = [0.42]
        _, images_to_plot_target_radius, _, dist_to_start = geo_interpolate_fix_r(
            x=x, y=perturb_current, interval=interval, target_radius=target_radius, save_codes=save_codes)
        print(dist_to_start)
        dist_perturbed.append(dist_to_start[0])
        images_perturbed.append(images_to_plot_target_radius[0])

    raw_image, _, _, _ = geo_interpolate_fix_r(x=x, y=perturb_current, interval=[
                                               0], target_radius=target_radius, save_codes=save_codes)
    images_perturbed.insert(0, raw_image[0])
    return images_perturbed, dist_perturbed

In [3]:
# define variables
init_image_path = './inputs/same_domain_test/123/test3.jpg'
ref_image_path = './inputs/same_domain_test/123/test3.jpg'
prompt = 'a image of a golden retriever'
outdir = './outputs'
skip_grid = False
skip_save = True
ddim_steps = 50
ddim_eta = 0.0
n_iter = 1
C = 4
f = 8
n_samples = 20
n_rows = 0
scale = 7.5
strength = 0.95
config_path = './configs/stable-diffusion/v2_inference_text.yaml'
ckpt = '/data2/mhf/DXL/Lingxiao/Codes/Paint-by-Example-test/models/Paint-by-Example/animal_faces/2024-09-11T11-30-12_v1/checkpoints/epoch=000035.ckpt'
seed = 3408
precision = 'autocast'

In [4]:
seed_everything(seed)

config = OmegaConf.load(f"{config_path}")
model = load_model_from_config(config, f"{ckpt}")

device = torch.device(
    "cuda") if torch.cuda.is_available() else torch.device("cpu")
model = model.to(device)
print("Successfully loaded model!")

sampler = DDIMSampler(model)


Global seed set to 3408


Loading model from /data2/mhf/DXL/Lingxiao/Codes/Paint-by-Example-test/models/Paint-by-Example/animal_faces/2024-09-11T11-30-12_v1/checkpoints/epoch=000035.ckpt
Global Step: 77724
No module 'xformers'. Proceeding without it.
LatentDiffusion: Running in eps-prediction mode
DiffusionWrapper has 865.91 M params.
making attention of type 'vanilla' with 512 in_channels
Working with z of shape (1, 4, 32, 32) = 4096 dimensions.
making attention of type 'vanilla' with 512 in_channels


Some weights of the model checkpoint at /data2/mhf/DXL/Lingxiao/Cache/huggingface/hub/models--openai--clip-vit-large-patch14 were not used when initializing CLIPTextModel: ['vision_model.encoder.layers.3.layer_norm2.bias', 'vision_model.encoder.layers.11.self_attn.out_proj.weight', 'vision_model.encoder.layers.12.layer_norm2.weight', 'vision_model.encoder.layers.11.self_attn.k_proj.bias', 'vision_model.pre_layrnorm.bias', 'vision_model.encoder.layers.22.mlp.fc1.bias', 'vision_model.encoder.layers.20.self_attn.v_proj.bias', 'vision_model.encoder.layers.8.layer_norm2.weight', 'vision_model.encoder.layers.1.self_attn.k_proj.weight', 'vision_model.encoder.layers.18.layer_norm1.bias', 'vision_model.encoder.layers.9.self_attn.out_proj.weight', 'vision_model.encoder.layers.6.layer_norm1.weight', 'vision_model.encoder.layers.16.mlp.fc1.weight', 'vision_model.encoder.layers.14.layer_norm2.weight', 'vision_model.encoder.layers.9.self_attn.v_proj.bias', 'vision_model.encoder.layers.14.self_attn.v

Use hyperbolic: True
Loading HAE from checkpoint: /data2/mhf/DXL/Lingxiao/Codes/hyperediting/exp_out/hyper_styleGANinversion_animalfaces_512_5_30_init_v2/checkpoints/iteration_11000.pt
Successfully loaded model!


In [5]:
os.makedirs(outdir, exist_ok=True)
outpath = outdir

batch_size = n_samples
n_rows = n_rows if n_rows > 0 else batch_size

sample_path = os.path.join(outpath, "samples")
os.makedirs(sample_path, exist_ok=True)
base_count = len(os.listdir(sample_path))
grid_count = len(os.listdir(outpath)) - 1

In [6]:
# load prompt
data = []
for i in range(batch_size):
    data.append([prompt])

# load images
# load init image
assert os.path.isfile(init_image_path)
init_image = load_img(init_image_path, [512, 512]).to(device)
init_image_resized = load_img(init_image_path, [224, 224]).to(device)
init_image = repeat(init_image, '1 ... -> b ...', b=batch_size)
init_image_resized = repeat(init_image_resized, '1 ... -> b ...', b=batch_size)
init_latent = model.get_first_stage_encoding(
    model.encode_first_stage(init_image))  # move to latent space

# load ref image
assert os.path.isfile(ref_image_path)
ref_image = load_img(ref_image_path, [224, 224]).to(device)
ref_image = repeat(ref_image, '1 ... -> b ...', b=batch_size)

sampler.make_schedule(ddim_num_steps=ddim_steps,
                        ddim_eta=ddim_eta, verbose=False)

assert 0. <= strength <= 1., 'can only work with strength in [0.0, 1.0]'
t_enc = int(strength * ddim_steps)
print(f"target t_enc is {t_enc} steps")

loaded input image of size (87, 103) from ./inputs/same_domain_test/123/test3.jpg
loaded input image of size (87, 103) from ./inputs/same_domain_test/123/test3.jpg
loaded input image of size (87, 103) from ./inputs/same_domain_test/123/test3.jpg
target t_enc is 47 steps


## Manipulate latent codes in Hyperbolic space

In [7]:
print(data)
hyp_code = get_hyp_codes(model, data[0])
print(hyp_code.shape)
print(gmath.dist0(hyp_code, k=torch.tensor(-1.0)))
# this is used for generating figure of varying radius in our paper
rescaled_codes = []
target_radii = [6.2126, 4, 2.5, 1, 0.5, 0]
for i in target_radii:
    hyp_code_rescaled = rescale(i, hyp_code)
    hyp_code_rescaled = repeat(hyp_code_rescaled, '1 ... -> b ...', b=batch_size)
    feature_euc = get_condition_given_hyp_codes(model, hyp_code_rescaled)
    rescaled_codes.append(feature_euc)
    

[['a image of a golden retriever'], ['a image of a golden retriever'], ['a image of a golden retriever'], ['a image of a golden retriever'], ['a image of a golden retriever'], ['a image of a golden retriever'], ['a image of a golden retriever'], ['a image of a golden retriever'], ['a image of a golden retriever'], ['a image of a golden retriever'], ['a image of a golden retriever'], ['a image of a golden retriever'], ['a image of a golden retriever'], ['a image of a golden retriever'], ['a image of a golden retriever'], ['a image of a golden retriever'], ['a image of a golden retriever'], ['a image of a golden retriever'], ['a image of a golden retriever'], ['a image of a golden retriever']]
torch.Size([1, 77, 768])
torch.Size([1, 768])


RuntimeError: Given normalized_shape=[1024], expected input with shape [*, 1024], but got input of size[1, 1, 1, 768]

## Sample Images

In [7]:
precision_scope = autocast if precision == "autocast" else nullcontext
with torch.no_grad():
    with precision_scope("cuda"):
        with model.ema_scope():
            tic = time.time()
            all_samples = list()
            shape = [C, 64, 64]
            # encode (scaled latent)
            if strength < 1.0:
                z_enc = sampler.stochastic_encode(init_latent, torch.tensor([t_enc]*batch_size).to(device))
            # print(f"z_enc shape: {z_enc.shape}")
            else:
                z_enc = torch.randn([n_samples, 4, 64, 64], device=device)
            # decode it
            for c in rescaled_codes:
                samples_ddim, _ = sampler.sample(S=ddim_steps,
                                                    conditioning=c,
                                                    batch_size=n_samples,
                                                    shape=shape,
                                                    verbose=False,
                                                    unconditional_guidance_scale=scale,
                                                    unconditional_conditioning=c,
                                                    eta=ddim_eta,
                                                    x_T=z_enc)
                '''
                samples_ddim = sampler.decode(z_enc, c, t_enc, unconditional_guidance_scale=scale,
                                            unconditional_conditioning=uc,)
                                            '''

                x_samples = model.decode_first_stage(samples_ddim)
                x_samples = torch.clamp(
                    (x_samples + 1.0) / 2.0, min=0.0, max=1.0)

                if not skip_save:
                    for x_sample in x_samples:
                        x_sample = 255. * \
                            rearrange(x_sample.cpu().numpy(),
                                        'c h w -> h w c')
                        Image.fromarray(x_sample.astype(np.uint8)).save(
                            os.path.join(sample_path, f"{base_count:05}.png"))
                        base_count += 1
                all_samples.append(x_samples)

                if not skip_grid:
                    # additionally, save as grid
                    grid = torch.stack(all_samples, 0)
                    grid = rearrange(grid, 'n b c h w -> (n b) c h w')
                    grid = make_grid(grid, nrow=n_rows)

            # to image
            grid = 255. * \
                rearrange(grid, 'c h w -> h w c').cpu().numpy()
            Image.fromarray(grid.astype(np.uint8)).save(
                os.path.join(outpath, f'grid-{grid_count:04}.png'))
            grid_count += 1
            del grid

        toc = time.time()

print(f"Your samples are ready and waiting for you here: \n{outpath} \n"
        f" \nEnjoy.")

NameError: name 'rescaled_codes' is not defined

## Mean of the variation

In [27]:
precision_scope = autocast if precision == "autocast" else nullcontext
with torch.no_grad():
    with precision_scope("cuda"):
        with model.ema_scope():
            tic = time.time()
            all_samples = list()
            shape = [C, 64, 64]
            # encode (scaled latent)
            if strength < 1.0:
                z_enc = sampler.stochastic_encode(
                    init_latent, torch.tensor([t_enc]*batch_size).to(device))
            # print(f"z_enc shape: {z_enc.shape}")
            else:
                z_enc = torch.randn([n_samples, 4, 64, 64], device=device)
            # decode it
            for c in rescaled_codes:
                samples_ddim, _ = sampler.sample(S=ddim_steps,
                                                 conditioning=c,
                                                 batch_size=n_samples,
                                                 shape=shape,
                                                 verbose=False,
                                                 unconditional_guidance_scale=scale,
                                                 unconditional_conditioning=c,
                                                 eta=ddim_eta,
                                                 x_T=z_enc)
                '''
                samples_ddim = sampler.decode(z_enc, c, t_enc, unconditional_guidance_scale=scale,
                                            unconditional_conditioning=uc,)
                                            '''

                x_samples = model.decode_first_stage(samples_ddim)
                x_samples = torch.clamp(
                    (x_samples + 1.0) / 2.0, min=0.0, max=1.0)
                print(x_samples.shape)
                x_samples_mean = x_samples.mean(0).unsqueeze(0)
                if not skip_save:
                    for x_sample in x_samples:
                        x_sample = 255. * \
                            rearrange(x_sample.cpu().numpy(),
                                      'c h w -> h w c')
                        Image.fromarray(x_sample.astype(np.uint8)).save(
                            os.path.join(sample_path, f"{base_count:05}.png"))
                        base_count += 1
                all_samples.append(x_samples_mean)

                if not skip_grid:
                    # additionally, save as grid
                    grid = torch.stack(all_samples, 0)
                    grid = rearrange(grid, 'n b c h w -> (n b) c h w')
                    grid = make_grid(grid, nrow=n_rows)

            # to image
            grid = 255. * \
                rearrange(grid, 'c h w -> h w c').cpu().numpy()
            Image.fromarray(grid.astype(np.uint8)).save(
                os.path.join(outpath, f'grid-{grid_count:04}.png'))
            grid_count += 1
            del grid

        toc = time.time()

print(f"Your samples are ready and waiting for you here: \n{outpath} \n"
      f" \nEnjoy.")

Data shape for DDIM sampling is (20, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████| 50/50 [00:32<00:00,  1.53it/s]


torch.Size([20, 3, 512, 512])
Data shape for DDIM sampling is (20, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████| 50/50 [00:32<00:00,  1.52it/s]


torch.Size([20, 3, 512, 512])
Data shape for DDIM sampling is (20, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████| 50/50 [00:33<00:00,  1.51it/s]


torch.Size([20, 3, 512, 512])
Data shape for DDIM sampling is (20, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████| 50/50 [00:33<00:00,  1.51it/s]


torch.Size([20, 3, 512, 512])
Data shape for DDIM sampling is (20, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████| 50/50 [00:33<00:00,  1.50it/s]


torch.Size([20, 3, 512, 512])
Data shape for DDIM sampling is (20, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps


DDIM Sampler: 100%|██████████| 50/50 [00:33<00:00,  1.50it/s]


torch.Size([20, 3, 512, 512])
Your samples are ready and waiting for you here: 
./outputs 
 
Enjoy.
