In [1]:
import json
import cv2
import numpy as np

from torch.utils.data import Dataset

In [3]:
dataset_path = './training/fill50k/'
prompt_json = f"{dataset_path}prompt.json"

In [None]:
# Sample prompt.json line
# {"source": "source/0.png", "target": "target/0.png", "prompt": "pale golden rod circle with old lace background"}


In [4]:

class MyDataset(Dataset):
    def __init__(self):
        self.data = []
        with open(prompt_json, 'rt') as f:
            for line in f:
                self.data.append(json.loads(line))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]

        source_filename = item['source']
        target_filename = item['target']
        prompt = item['prompt']

        source = cv2.imread(dataset_path + source_filename)
        target = cv2.imread(dataset_path + target_filename)

        # Do not forget that OpenCV read images in BGR order.
        source = cv2.cvtColor(source, cv2.COLOR_BGR2RGB)
        target = cv2.cvtColor(target, cv2.COLOR_BGR2RGB)

        # Normalize source images to [0, 1].
        source = source.astype(np.float32) / 255.0

        # Normalize target images to [-1, 1].
        target = (target.astype(np.float32) / 127.5) - 1.0

        return dict(jpg=target, txt=prompt, hint=source)


In [13]:
import  random 

dataset = MyDataset()
print(len(dataset))
sample_idx = random.randint(0, len(dataset)-1)
item = dataset[sample_idx]
jpg = item['jpg']
txt = item['txt']
hint = item['hint']
print(txt)
print(jpg.shape)
print(hint.shape)


50000
tan circle with pale green background
(512, 512, 3)
(512, 512, 3)


In [18]:
# Add Control
INPUT_MODEL = "./models/v1-5-pruned.ckpt"
OUTPUT_MODEL = "./models/control_sd15_ini.ckpt"

In [20]:
import os


assert os.path.exists(INPUT_MODEL), 'Input model does not exist.'
assert not os.path.exists(OUTPUT_MODEL), 'Output filename already exists.'
assert os.path.exists(os.path.dirname(OUTPUT_MODEL)), 'Output path is not valid.'

import torch
from share import *
from cldm.model import create_model


def get_node_name(name, parent_name):
    if len(name) <= len(parent_name):
        return False, ''
    p = name[:len(parent_name)]
    if p != parent_name:
        return False, ''
    return True, name[len(parent_name):]


model = create_model(config_path='./models/cldm_v15.yaml')

pretrained_weights = torch.load(INPUT_MODEL)
if 'state_dict' in pretrained_weights:
    pretrained_weights = pretrained_weights['state_dict']

scratch_dict = model.state_dict()

target_dict = {}
for k in scratch_dict.keys():
    is_control, name = get_node_name(k, 'control_')
    if is_control:
        copy_k = 'model.diffusion_' + name
    else:
        copy_k = k
    if copy_k in pretrained_weights:
        target_dict[k] = pretrained_weights[copy_k].clone()
    else:
        target_dict[k] = scratch_dict[k].clone()
        print(f'These weights are newly added: {k}')

model.load_state_dict(target_dict, strict=True)
torch.save(model.state_dict(), OUTPUT_MODEL)
print('Done.')


ControlLDM: Running in eps-prediction mode
DiffusionWrapper has 859.52 M params.
making attention of type 'vanilla' with 512 in_channels
Working with z of shape (1, 4, 32, 32) = 4096 dimensions.
making attention of type 'vanilla' with 512 in_channels
Loaded model config from [./models/cldm_v15.yaml]
These weights are newly added: logvar
These weights are newly added: control_model.zero_convs.0.0.weight
These weights are newly added: control_model.zero_convs.0.0.bias
These weights are newly added: control_model.zero_convs.1.0.weight
These weights are newly added: control_model.zero_convs.1.0.bias
These weights are newly added: control_model.zero_convs.2.0.weight
These weights are newly added: control_model.zero_convs.2.0.bias
These weights are newly added: control_model.zero_convs.3.0.weight
These weights are newly added: control_model.zero_convs.3.0.bias
These weights are newly added: control_model.zero_convs.4.0.weight
These weights are newly added: control_model.zero_convs.4.0.bias
T

In [24]:
import pytorch_lightning as pl
from torch.utils.data import DataLoader
from tutorial_dataset import MyDataset
from cldm.logger import ImageLogger
from cldm.model import create_model, load_state_dict


# Configs
resume_path = OUTPUT_MODEL
batch_size = 2
logger_freq = 300
learning_rate = 1e-5
sd_locked = True
only_mid_control = False

# accumulate_grad_batches=4

# First use cpu to load models. Pytorch Lightning will automatically move it to GPUs.
model = create_model('./models/cldm_v15.yaml').cpu()
model.load_state_dict(load_state_dict(resume_path, location='cpu'))
model.learning_rate = learning_rate
model.sd_locked = sd_locked
model.only_mid_control = only_mid_control


# Misc
dataset = MyDataset()
dataloader = DataLoader(dataset, num_workers=0, batch_size=batch_size, shuffle=True)
logger = ImageLogger(batch_frequency=logger_freq)
trainer = pl.Trainer(gpus=2, precision=32, callbacks=[logger], check_val_every_n_epoch=100)


# Train!
trainer.fit(model, dataloader)


ControlLDM: Running in eps-prediction mode
DiffusionWrapper has 859.52 M params.
making attention of type 'vanilla' with 512 in_channels
Working with z of shape (1, 4, 32, 32) = 4096 dimensions.
making attention of type 'vanilla' with 512 in_channels
Loaded model config from [./models/cldm_v15.yaml]
Loaded state_dict from [./models/control_sd15_ini.ckpt]


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


No module 'xformers'. Proceeding without it.


initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/2


No module 'xformers'. Proceeding without it.


initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/2
----------------------------------------------------------------------------------------------------
distributed_backend=nccl
All distributed processes registered. Starting with 2 processes
----------------------------------------------------------------------------------------------------


  | Name              | Type               | Params
---------------------------------------------------------
0 | model             | DiffusionWrapper   | 859 M 
1 | first_stage_model | AutoencoderKL      | 83.7 M
2 | cond_stage_model  | FrozenCLIPEmbedder | 123 M 
3 | control_model     | ControlNet         | 361 M 
---------------------------------------------------------
1.2 B     Trainable params
206 M     Non-trainable params
1.4 B     Total params
5,710.058 Total estimated model params size (MB)
  rank_zero_warn(


Epoch 0:   0%|          | 0/12500 [00:00<?, ?it/s] 

  return F.conv2d(input, weight, bias, self.stride,
  return F.conv2d(input, weight, bias, self.stride,


Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps
Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps



DDIM Sampler:   2%|▏         | 1/50 [00:00<00:29,  1.66it/s]
DDIM Sampler:   4%|▍         | 2/50 [00:00<00:22,  2.13it/s][A
DDIM Sampler:   6%|▌         | 3/50 [00:01<00:20,  2.35it/s][A
DDIM Sampler:   8%|▊         | 4/50 [00:01<00:18,  2.46it/s][A
DDIM Sampler:  10%|█         | 5/50 [00:02<00:17,  2.52it/s][A
DDIM Sampler:  12%|█▏        | 6/50 [00:02<00:17,  2.56it/s][A
DDIM Sampler:  14%|█▍        | 7/50 [00:02<00:16,  2.59it/s][A
DDIM Sampler:  16%|█▌        | 8/50 [00:03<00:16,  2.60it/s][A
DDIM Sampler:  18%|█▊        | 9/50 [00:03<00:15,  2.62it/s][A
DDIM Sampler:  20%|██        | 10/50 [00:03<00:15,  2.62it/s][A
DDIM Sampler:  22%|██▏       | 11/50 [00:04<00:14,  2.64it/s][A
DDIM Sampler:  24%|██▍       | 12/50 [00:04<00:14,  2.64it/s][A
DDIM Sampler:  26%|██▌       | 13/50 [00:05<00:14,  2.64it/s][A
DDIM Sampler:  28%|██▊       | 14/50 [00:05<00:13,  2.65it/s][A
DDIM Sampler:  30%|███       | 15/50 [00:05<00:13,  2.63it/s][A
DDIM Sampler:  32%|███▏      | 16/50 

Epoch 0:   2%|▏         | 300/12500 [14:36<9:53:53,  2.92s/it, loss=0.00775, v_num=3, train/loss_simple_step=0.00157, train/loss_vlb_step=6.34e-6, train/loss_step=0.00157, global_step=299.0]   Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps
Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps



DDIM Sampler:   0%|          | 0/50 [00:00<?, ?it/s][A
DDIM Sampler:   2%|▏         | 1/50 [00:00<00:18,  2.62it/s][A
DDIM Sampler:   4%|▍         | 2/50 [00:00<00:18,  2.63it/s][A
DDIM Sampler:   6%|▌         | 3/50 [00:01<00:17,  2.62it/s][A
DDIM Sampler:   8%|▊         | 4/50 [00:01<00:17,  2.63it/s][A
DDIM Sampler:  10%|█         | 5/50 [00:01<00:17,  2.63it/s][A
DDIM Sampler:  12%|█▏        | 6/50 [00:02<00:16,  2.62it/s][A
DDIM Sampler:  14%|█▍        | 7/50 [00:02<00:16,  2.62it/s][A
DDIM Sampler:  16%|█▌        | 8/50 [00:03<00:16,  2.62it/s][A
DDIM Sampler:  18%|█▊        | 9/50 [00:03<00:15,  2.62it/s][A
DDIM Sampler:  20%|██        | 10/50 [00:03<00:15,  2.62it/s][A
DDIM Sampler:  22%|██▏       | 11/50 [00:04<00:14,  2.63it/s][A
DDIM Sampler:  24%|██▍       | 12/50 [00:04<00:14,  2.62it/s][A
DDIM Sampler:  26%|██▌       | 13/50 [00:04<00:14,  2.62it/s][A
DDIM Sampler:  28%|██▊       | 14/50 [00:05<00:13,  2.62it/s][A
DDIM Sampler:  30%|███       | 15/50 [00:0

Epoch 0:   5%|▍         | 600/12500 [29:10<9:38:42,  2.92s/it, loss=0.00769, v_num=3, train/loss_simple_step=0.00665, train/loss_vlb_step=4.89e-5, train/loss_step=0.00665, global_step=599.0]   Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps
Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps



DDIM Sampler:   0%|          | 0/50 [00:00<?, ?it/s][A
DDIM Sampler:   2%|▏         | 1/50 [00:00<00:18,  2.66it/s][A
DDIM Sampler:   4%|▍         | 2/50 [00:00<00:18,  2.62it/s][A
DDIM Sampler:   6%|▌         | 3/50 [00:01<00:17,  2.63it/s][A
DDIM Sampler:   8%|▊         | 4/50 [00:01<00:17,  2.63it/s][A
DDIM Sampler:  10%|█         | 5/50 [00:01<00:17,  2.62it/s][A
DDIM Sampler:  12%|█▏        | 6/50 [00:02<00:16,  2.62it/s][A
DDIM Sampler:  14%|█▍        | 7/50 [00:02<00:16,  2.63it/s][A
DDIM Sampler:  16%|█▌        | 8/50 [00:03<00:15,  2.64it/s][A
DDIM Sampler:  18%|█▊        | 9/50 [00:03<00:15,  2.63it/s][A
DDIM Sampler:  20%|██        | 10/50 [00:03<00:15,  2.62it/s][A
DDIM Sampler:  22%|██▏       | 11/50 [00:04<00:14,  2.62it/s][A
DDIM Sampler:  24%|██▍       | 12/50 [00:04<00:14,  2.62it/s][A
DDIM Sampler:  26%|██▌       | 13/50 [00:04<00:14,  2.62it/s][A
DDIM Sampler:  28%|██▊       | 14/50 [00:05<00:13,  2.62it/s][A
DDIM Sampler:  30%|███       | 15/50 [00:0

Epoch 0:   7%|▋         | 900/12500 [43:45<9:24:03,  2.92s/it, loss=0.00476, v_num=3, train/loss_simple_step=0.00146, train/loss_vlb_step=5.74e-6, train/loss_step=0.00146, global_step=899.0]  Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps
Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps



DDIM Sampler:   0%|          | 0/50 [00:00<?, ?it/s][A
DDIM Sampler:   2%|▏         | 1/50 [00:00<00:18,  2.63it/s][A
DDIM Sampler:   4%|▍         | 2/50 [00:00<00:18,  2.66it/s][A
DDIM Sampler:   6%|▌         | 3/50 [00:01<00:17,  2.63it/s][A
DDIM Sampler:   8%|▊         | 4/50 [00:01<00:17,  2.62it/s][A
DDIM Sampler:  10%|█         | 5/50 [00:01<00:17,  2.63it/s][A
DDIM Sampler:  12%|█▏        | 6/50 [00:02<00:16,  2.62it/s][A
DDIM Sampler:  14%|█▍        | 7/50 [00:02<00:16,  2.62it/s][A
DDIM Sampler:  16%|█▌        | 8/50 [00:03<00:16,  2.61it/s][A
DDIM Sampler:  18%|█▊        | 9/50 [00:03<00:15,  2.62it/s][A
DDIM Sampler:  20%|██        | 10/50 [00:03<00:15,  2.62it/s][A
DDIM Sampler:  22%|██▏       | 11/50 [00:04<00:14,  2.62it/s][A
DDIM Sampler:  24%|██▍       | 12/50 [00:04<00:14,  2.62it/s][A
DDIM Sampler:  26%|██▌       | 13/50 [00:04<00:14,  2.61it/s][A
DDIM Sampler:  28%|██▊       | 14/50 [00:05<00:13,  2.62it/s][A
DDIM Sampler:  30%|███       | 15/50 [00:0

Epoch 0:  10%|▉         | 1200/12500 [58:21<9:09:30,  2.92s/it, loss=0.00406, v_num=3, train/loss_simple_step=0.00361, train/loss_vlb_step=1.28e-5, train/loss_step=0.00361, global_step=1199.0]  Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps
Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps



DDIM Sampler:   0%|          | 0/50 [00:00<?, ?it/s][A
DDIM Sampler:   2%|▏         | 1/50 [00:00<00:18,  2.66it/s][A
DDIM Sampler:   4%|▍         | 2/50 [00:00<00:18,  2.64it/s][A
DDIM Sampler:   6%|▌         | 3/50 [00:01<00:17,  2.62it/s][A
DDIM Sampler:   8%|▊         | 4/50 [00:01<00:17,  2.62it/s][A
DDIM Sampler:  10%|█         | 5/50 [00:01<00:17,  2.62it/s][A
DDIM Sampler:  12%|█▏        | 6/50 [00:02<00:16,  2.61it/s][A
DDIM Sampler:  14%|█▍        | 7/50 [00:02<00:16,  2.62it/s][A
DDIM Sampler:  16%|█▌        | 8/50 [00:03<00:16,  2.62it/s][A
DDIM Sampler:  18%|█▊        | 9/50 [00:03<00:15,  2.62it/s][A
DDIM Sampler:  20%|██        | 10/50 [00:03<00:15,  2.59it/s][A
DDIM Sampler:  22%|██▏       | 11/50 [00:04<00:15,  2.59it/s][A
DDIM Sampler:  24%|██▍       | 12/50 [00:04<00:14,  2.60it/s][A
DDIM Sampler:  26%|██▌       | 13/50 [00:04<00:14,  2.61it/s][A
DDIM Sampler:  28%|██▊       | 14/50 [00:05<00:13,  2.62it/s][A
DDIM Sampler:  30%|███       | 15/50 [00:0

Epoch 0:  12%|█▏        | 1500/12500 [1:12:57<8:54:58,  2.92s/it, loss=0.00541, v_num=3, train/loss_simple_step=0.00967, train/loss_vlb_step=4.95e-5, train/loss_step=0.00967, global_step=1499.0]  Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps
Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps



DDIM Sampler:   0%|          | 0/50 [00:00<?, ?it/s][A
DDIM Sampler:   2%|▏         | 1/50 [00:00<00:18,  2.62it/s][A
DDIM Sampler:   4%|▍         | 2/50 [00:00<00:18,  2.63it/s][A
DDIM Sampler:   6%|▌         | 3/50 [00:01<00:17,  2.63it/s][A
DDIM Sampler:   8%|▊         | 4/50 [00:01<00:17,  2.62it/s][A
DDIM Sampler:  10%|█         | 5/50 [00:01<00:17,  2.62it/s][A
DDIM Sampler:  12%|█▏        | 6/50 [00:02<00:16,  2.61it/s][A
DDIM Sampler:  14%|█▍        | 7/50 [00:02<00:16,  2.62it/s][A
DDIM Sampler:  16%|█▌        | 8/50 [00:03<00:16,  2.61it/s][A
DDIM Sampler:  18%|█▊        | 9/50 [00:03<00:15,  2.62it/s][A
DDIM Sampler:  20%|██        | 10/50 [00:03<00:15,  2.62it/s][A
DDIM Sampler:  22%|██▏       | 11/50 [00:04<00:15,  2.59it/s][A
DDIM Sampler:  24%|██▍       | 12/50 [00:04<00:14,  2.59it/s][A
DDIM Sampler:  26%|██▌       | 13/50 [00:04<00:14,  2.60it/s][A
DDIM Sampler:  28%|██▊       | 14/50 [00:05<00:14,  2.57it/s][A
DDIM Sampler:  30%|███       | 15/50 [00:0

Epoch 0:  14%|█▍        | 1800/12500 [1:27:32<8:40:24,  2.92s/it, loss=0.00441, v_num=3, train/loss_simple_step=0.00528, train/loss_vlb_step=2.69e-5, train/loss_step=0.00528, global_step=1799.0]  Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps
Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps



DDIM Sampler:   0%|          | 0/50 [00:00<?, ?it/s][A
DDIM Sampler:   2%|▏         | 1/50 [00:00<00:18,  2.65it/s][A
DDIM Sampler:   4%|▍         | 2/50 [00:00<00:18,  2.62it/s][A
DDIM Sampler:   6%|▌         | 3/50 [00:01<00:17,  2.61it/s][A
DDIM Sampler:   8%|▊         | 4/50 [00:01<00:17,  2.61it/s][A
DDIM Sampler:  10%|█         | 5/50 [00:01<00:17,  2.61it/s][A
DDIM Sampler:  12%|█▏        | 6/50 [00:02<00:16,  2.62it/s][A
DDIM Sampler:  14%|█▍        | 7/50 [00:02<00:16,  2.60it/s][A
DDIM Sampler:  16%|█▌        | 8/50 [00:03<00:16,  2.58it/s][A
DDIM Sampler:  18%|█▊        | 9/50 [00:03<00:15,  2.57it/s][A
DDIM Sampler:  20%|██        | 10/50 [00:03<00:15,  2.57it/s][A
DDIM Sampler:  22%|██▏       | 11/50 [00:04<00:15,  2.58it/s][A
DDIM Sampler:  24%|██▍       | 12/50 [00:04<00:14,  2.58it/s][A
DDIM Sampler:  26%|██▌       | 13/50 [00:05<00:14,  2.57it/s][A
DDIM Sampler:  28%|██▊       | 14/50 [00:05<00:13,  2.58it/s][A
DDIM Sampler:  30%|███       | 15/50 [00:0

Epoch 0:  17%|█▋        | 2100/12500 [1:42:07<8:25:48,  2.92s/it, loss=0.00832, v_num=3, train/loss_simple_step=0.00104, train/loss_vlb_step=5.25e-6, train/loss_step=0.00104, global_step=2099.0]  Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps
Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps



DDIM Sampler:   0%|          | 0/50 [00:00<?, ?it/s][A
DDIM Sampler:   2%|▏         | 1/50 [00:00<00:18,  2.66it/s][A
DDIM Sampler:   4%|▍         | 2/50 [00:00<00:18,  2.62it/s][A
DDIM Sampler:   6%|▌         | 3/50 [00:01<00:17,  2.62it/s][A
DDIM Sampler:   8%|▊         | 4/50 [00:01<00:17,  2.62it/s][A
DDIM Sampler:  10%|█         | 5/50 [00:01<00:17,  2.63it/s][A
DDIM Sampler:  12%|█▏        | 6/50 [00:02<00:16,  2.63it/s][A
DDIM Sampler:  14%|█▍        | 7/50 [00:02<00:16,  2.60it/s][A
DDIM Sampler:  16%|█▌        | 8/50 [00:03<00:16,  2.57it/s][A
DDIM Sampler:  18%|█▊        | 9/50 [00:03<00:15,  2.57it/s][A
DDIM Sampler:  20%|██        | 10/50 [00:03<00:15,  2.57it/s][A
DDIM Sampler:  22%|██▏       | 11/50 [00:04<00:15,  2.57it/s][A
DDIM Sampler:  24%|██▍       | 12/50 [00:04<00:14,  2.58it/s][A
DDIM Sampler:  26%|██▌       | 13/50 [00:05<00:14,  2.58it/s][A
DDIM Sampler:  28%|██▊       | 14/50 [00:05<00:14,  2.57it/s][A
DDIM Sampler:  30%|███       | 15/50 [00:0

Epoch 0:  17%|█▋        | 2101/12500 [1:42:30<8:27:23,  2.93s/it, loss=0.00848, v_num=3, train/loss_simple_step=0.00593, train/loss_vlb_step=3.14e-5, train/loss_step=0.00593, global_step=2100.0]

DDIM Sampler: 100%|██████████| 50/50 [00:19<00:00,  2.55it/s]


Epoch 0:  19%|█▉        | 2400/12500 [1:56:43<8:11:15,  2.92s/it, loss=0.0042, v_num=3, train/loss_simple_step=0.00442, train/loss_vlb_step=2.03e-5, train/loss_step=0.00442, global_step=2399.0]   Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps
Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps



DDIM Sampler:   0%|          | 0/50 [00:00<?, ?it/s][A
DDIM Sampler:   2%|▏         | 1/50 [00:00<00:18,  2.65it/s][A
DDIM Sampler:   4%|▍         | 2/50 [00:00<00:18,  2.64it/s][A
DDIM Sampler:   6%|▌         | 3/50 [00:01<00:17,  2.63it/s][A
DDIM Sampler:   8%|▊         | 4/50 [00:01<00:17,  2.60it/s][A
DDIM Sampler:  10%|█         | 5/50 [00:01<00:17,  2.59it/s][A
DDIM Sampler:  12%|█▏        | 6/50 [00:02<00:17,  2.59it/s][A
DDIM Sampler:  14%|█▍        | 7/50 [00:02<00:16,  2.59it/s][A
DDIM Sampler:  16%|█▌        | 8/50 [00:03<00:16,  2.59it/s][A
DDIM Sampler:  18%|█▊        | 9/50 [00:03<00:15,  2.58it/s][A
DDIM Sampler:  20%|██        | 10/50 [00:03<00:15,  2.57it/s][A
DDIM Sampler:  22%|██▏       | 11/50 [00:04<00:15,  2.57it/s][A
DDIM Sampler:  24%|██▍       | 12/50 [00:04<00:14,  2.57it/s][A
DDIM Sampler:  26%|██▌       | 13/50 [00:05<00:14,  2.59it/s][A
DDIM Sampler:  28%|██▊       | 14/50 [00:05<00:14,  2.57it/s][A
DDIM Sampler:  30%|███       | 15/50 [00:0

Epoch 0:  19%|█▉        | 2401/12500 [1:57:06<8:12:35,  2.93s/it, loss=0.00391, v_num=3, train/loss_simple_step=0.0105, train/loss_vlb_step=0.000131, train/loss_step=0.0105, global_step=2400.0]

DDIM Sampler: 100%|██████████| 50/50 [00:19<00:00,  2.56it/s]


Epoch 0:  22%|██▏       | 2700/12500 [2:11:20<7:56:45,  2.92s/it, loss=0.00318, v_num=3, train/loss_simple_step=0.0028, train/loss_vlb_step=1.03e-5, train/loss_step=0.0028, global_step=2699.0]    Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps
Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps



DDIM Sampler:   0%|          | 0/50 [00:00<?, ?it/s][A
DDIM Sampler:   2%|▏         | 1/50 [00:00<00:18,  2.61it/s][A
DDIM Sampler:   4%|▍         | 2/50 [00:00<00:18,  2.62it/s][A
DDIM Sampler:   6%|▌         | 3/50 [00:01<00:17,  2.62it/s][A
DDIM Sampler:   8%|▊         | 4/50 [00:01<00:17,  2.58it/s][A
DDIM Sampler:  10%|█         | 5/50 [00:01<00:17,  2.58it/s][A
DDIM Sampler:  12%|█▏        | 6/50 [00:02<00:17,  2.59it/s][A
DDIM Sampler:  14%|█▍        | 7/50 [00:02<00:16,  2.59it/s][A
DDIM Sampler:  16%|█▌        | 8/50 [00:03<00:16,  2.55it/s][A
DDIM Sampler:  18%|█▊        | 9/50 [00:03<00:16,  2.55it/s][A
DDIM Sampler:  20%|██        | 10/50 [00:03<00:15,  2.56it/s][A
DDIM Sampler:  22%|██▏       | 11/50 [00:04<00:15,  2.56it/s][A
DDIM Sampler:  24%|██▍       | 12/50 [00:04<00:14,  2.57it/s][A
DDIM Sampler:  26%|██▌       | 13/50 [00:05<00:14,  2.57it/s][A
DDIM Sampler:  28%|██▊       | 14/50 [00:05<00:14,  2.55it/s][A
DDIM Sampler:  30%|███       | 15/50 [00:0

Epoch 0:  22%|██▏       | 2701/12500 [2:11:43<7:57:54,  2.93s/it, loss=0.00316, v_num=3, train/loss_simple_step=0.00186, train/loss_vlb_step=6.6e-6, train/loss_step=0.00186, global_step=2700.0]

DDIM Sampler: 100%|██████████| 50/50 [00:19<00:00,  2.54it/s]


Epoch 0:  24%|██▍       | 3000/12500 [2:25:57<7:42:11,  2.92s/it, loss=0.00506, v_num=3, train/loss_simple_step=0.00128, train/loss_vlb_step=5.22e-6, train/loss_step=0.00128, global_step=3e+3]    Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps
Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps



DDIM Sampler:   0%|          | 0/50 [00:00<?, ?it/s][A
DDIM Sampler:   2%|▏         | 1/50 [00:00<00:18,  2.62it/s][A
DDIM Sampler:   4%|▍         | 2/50 [00:00<00:18,  2.64it/s][A
DDIM Sampler:   6%|▌         | 3/50 [00:01<00:17,  2.62it/s][A
DDIM Sampler:   8%|▊         | 4/50 [00:01<00:17,  2.62it/s][A
DDIM Sampler:  10%|█         | 5/50 [00:01<00:17,  2.60it/s][A
DDIM Sampler:  12%|█▏        | 6/50 [00:02<00:16,  2.60it/s][A
DDIM Sampler:  14%|█▍        | 7/50 [00:02<00:16,  2.60it/s][A
DDIM Sampler:  16%|█▌        | 8/50 [00:03<00:16,  2.60it/s][A
DDIM Sampler:  18%|█▊        | 9/50 [00:03<00:15,  2.58it/s][A
DDIM Sampler:  20%|██        | 10/50 [00:03<00:15,  2.59it/s][A
DDIM Sampler:  22%|██▏       | 11/50 [00:04<00:15,  2.58it/s][A
DDIM Sampler:  24%|██▍       | 12/50 [00:04<00:14,  2.59it/s][A
DDIM Sampler:  26%|██▌       | 13/50 [00:05<00:14,  2.58it/s][A
DDIM Sampler:  28%|██▊       | 14/50 [00:05<00:14,  2.54it/s][A
DDIM Sampler:  30%|███       | 15/50 [00:0

Epoch 0:  24%|██▍       | 3001/12500 [2:26:19<7:43:10,  2.93s/it, loss=0.00485, v_num=3, train/loss_simple_step=0.00842, train/loss_vlb_step=7.31e-5, train/loss_step=0.00842, global_step=3e+3]

DDIM Sampler: 100%|██████████| 50/50 [00:19<00:00,  2.55it/s]


Epoch 0:  26%|██▋       | 3300/12500 [2:40:33<7:27:37,  2.92s/it, loss=0.00574, v_num=3, train/loss_simple_step=0.0175, train/loss_vlb_step=0.000186, train/loss_step=0.0175, global_step=3299.0]   Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps
Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps



DDIM Sampler:   0%|          | 0/50 [00:00<?, ?it/s][A
DDIM Sampler:   2%|▏         | 1/50 [00:00<00:18,  2.62it/s][A
DDIM Sampler:   4%|▍         | 2/50 [00:00<00:18,  2.63it/s][A
DDIM Sampler:   6%|▌         | 3/50 [00:01<00:17,  2.62it/s][A
DDIM Sampler:   8%|▊         | 4/50 [00:01<00:17,  2.58it/s][A
DDIM Sampler:  10%|█         | 5/50 [00:01<00:17,  2.59it/s][A
DDIM Sampler:  12%|█▏        | 6/50 [00:02<00:17,  2.57it/s][A
DDIM Sampler:  14%|█▍        | 7/50 [00:02<00:16,  2.56it/s][A
DDIM Sampler:  16%|█▌        | 8/50 [00:03<00:16,  2.57it/s][A
DDIM Sampler:  18%|█▊        | 9/50 [00:03<00:15,  2.56it/s][A
DDIM Sampler:  20%|██        | 10/50 [00:03<00:15,  2.56it/s][A
DDIM Sampler:  22%|██▏       | 11/50 [00:04<00:15,  2.57it/s][A
DDIM Sampler:  24%|██▍       | 12/50 [00:04<00:15,  2.53it/s][A
DDIM Sampler:  26%|██▌       | 13/50 [00:05<00:14,  2.55it/s][A
DDIM Sampler:  28%|██▊       | 14/50 [00:05<00:14,  2.53it/s][A
DDIM Sampler:  30%|███       | 15/50 [00:0

Epoch 0:  26%|██▋       | 3301/12500 [2:40:56<7:28:30,  2.93s/it, loss=0.00569, v_num=3, train/loss_simple_step=0.00141, train/loss_vlb_step=5.32e-6, train/loss_step=0.00141, global_step=3300.0]

DDIM Sampler: 100%|██████████| 50/50 [00:19<00:00,  2.54it/s]


Epoch 0:  29%|██▉       | 3600/12500 [2:55:10<7:13:03,  2.92s/it, loss=0.00512, v_num=3, train/loss_simple_step=0.00167, train/loss_vlb_step=8.77e-6, train/loss_step=0.00167, global_step=3599.0]  Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps
Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps



DDIM Sampler:   0%|          | 0/50 [00:00<?, ?it/s][A
DDIM Sampler:   2%|▏         | 1/50 [00:00<00:18,  2.65it/s][A
DDIM Sampler:   4%|▍         | 2/50 [00:00<00:18,  2.63it/s][A
DDIM Sampler:   6%|▌         | 3/50 [00:01<00:18,  2.60it/s][A
DDIM Sampler:   8%|▊         | 4/50 [00:01<00:17,  2.59it/s][A
DDIM Sampler:  10%|█         | 5/50 [00:01<00:17,  2.59it/s][A
DDIM Sampler:  12%|█▏        | 6/50 [00:02<00:16,  2.59it/s][A
DDIM Sampler:  14%|█▍        | 7/50 [00:02<00:16,  2.58it/s][A
DDIM Sampler:  16%|█▌        | 8/50 [00:03<00:16,  2.58it/s][A
DDIM Sampler:  18%|█▊        | 9/50 [00:03<00:15,  2.57it/s][A
DDIM Sampler:  20%|██        | 10/50 [00:03<00:15,  2.56it/s][A
DDIM Sampler:  22%|██▏       | 11/50 [00:04<00:15,  2.58it/s][A
DDIM Sampler:  24%|██▍       | 12/50 [00:04<00:15,  2.53it/s][A
DDIM Sampler:  26%|██▌       | 13/50 [00:05<00:14,  2.54it/s][A
DDIM Sampler:  28%|██▊       | 14/50 [00:05<00:14,  2.55it/s][A
DDIM Sampler:  30%|███       | 15/50 [00:0

Epoch 0:  29%|██▉       | 3601/12500 [2:55:32<7:13:49,  2.92s/it, loss=0.00524, v_num=3, train/loss_simple_step=0.00368, train/loss_vlb_step=1.4e-5, train/loss_step=0.00368, global_step=3600.0] 

DDIM Sampler: 100%|██████████| 50/50 [00:19<00:00,  2.55it/s]


Epoch 0:  31%|███       | 3900/12500 [3:09:47<6:58:30,  2.92s/it, loss=0.00824, v_num=3, train/loss_simple_step=0.00199, train/loss_vlb_step=6.8e-6, train/loss_step=0.00199, global_step=3899.0]   Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps
Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps



DDIM Sampler:   0%|          | 0/50 [00:00<?, ?it/s][A
DDIM Sampler:   2%|▏         | 1/50 [00:00<00:18,  2.66it/s][A
DDIM Sampler:   4%|▍         | 2/50 [00:00<00:18,  2.63it/s][A
DDIM Sampler:   6%|▌         | 3/50 [00:01<00:17,  2.62it/s][A
DDIM Sampler:   8%|▊         | 4/50 [00:01<00:17,  2.56it/s][A
DDIM Sampler:  10%|█         | 5/50 [00:01<00:17,  2.56it/s][A
DDIM Sampler:  12%|█▏        | 6/50 [00:02<00:17,  2.58it/s][A
DDIM Sampler:  14%|█▍        | 7/50 [00:02<00:16,  2.55it/s][A
DDIM Sampler:  16%|█▌        | 8/50 [00:03<00:16,  2.57it/s][A
DDIM Sampler:  18%|█▊        | 9/50 [00:03<00:15,  2.58it/s][A
DDIM Sampler:  20%|██        | 10/50 [00:03<00:15,  2.57it/s][A
DDIM Sampler:  22%|██▏       | 11/50 [00:04<00:15,  2.57it/s][A
DDIM Sampler:  24%|██▍       | 12/50 [00:04<00:14,  2.57it/s][A
DDIM Sampler:  26%|██▌       | 13/50 [00:05<00:14,  2.56it/s][A
DDIM Sampler:  28%|██▊       | 14/50 [00:05<00:14,  2.55it/s][A
DDIM Sampler:  30%|███       | 15/50 [00:0

Epoch 0:  31%|███       | 3901/12500 [3:10:09<6:59:11,  2.92s/it, loss=0.00728, v_num=3, train/loss_simple_step=0.000985, train/loss_vlb_step=5.17e-6, train/loss_step=0.000985, global_step=3900.0]

DDIM Sampler: 100%|██████████| 50/50 [00:19<00:00,  2.53it/s]


Epoch 0:  34%|███▎      | 4200/12500 [3:24:24<6:43:56,  2.92s/it, loss=0.00351, v_num=3, train/loss_simple_step=0.00181, train/loss_vlb_step=6.8e-6, train/loss_step=0.00181, global_step=4199.0]   Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps
Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps



DDIM Sampler:   0%|          | 0/50 [00:00<?, ?it/s][A
DDIM Sampler:   2%|▏         | 1/50 [00:00<00:18,  2.62it/s][A
DDIM Sampler:   4%|▍         | 2/50 [00:00<00:17,  2.67it/s][A
DDIM Sampler:   6%|▌         | 3/50 [00:01<00:17,  2.62it/s][A
DDIM Sampler:   8%|▊         | 4/50 [00:01<00:17,  2.61it/s][A
DDIM Sampler:  10%|█         | 5/50 [00:01<00:17,  2.59it/s][A
DDIM Sampler:  12%|█▏        | 6/50 [00:02<00:16,  2.60it/s][A
DDIM Sampler:  14%|█▍        | 7/50 [00:02<00:16,  2.59it/s][A
DDIM Sampler:  16%|█▌        | 8/50 [00:03<00:16,  2.57it/s][A
DDIM Sampler:  18%|█▊        | 9/50 [00:03<00:15,  2.58it/s][A
DDIM Sampler:  20%|██        | 10/50 [00:03<00:15,  2.57it/s][A
DDIM Sampler:  22%|██▏       | 11/50 [00:04<00:15,  2.58it/s][A
DDIM Sampler:  24%|██▍       | 12/50 [00:04<00:14,  2.57it/s][A
DDIM Sampler:  26%|██▌       | 13/50 [00:05<00:14,  2.57it/s][A
DDIM Sampler:  28%|██▊       | 14/50 [00:05<00:14,  2.56it/s][A
DDIM Sampler:  30%|███       | 15/50 [00:0

Epoch 0:  34%|███▎      | 4201/12500 [3:24:47<6:44:33,  2.92s/it, loss=0.00339, v_num=3, train/loss_simple_step=0.00155, train/loss_vlb_step=5.58e-6, train/loss_step=0.00155, global_step=4200.0]

DDIM Sampler: 100%|██████████| 50/50 [00:19<00:00,  2.54it/s]


Epoch 0:  36%|███▌      | 4500/12500 [3:39:02<6:29:23,  2.92s/it, loss=0.00553, v_num=3, train/loss_simple_step=0.00509, train/loss_vlb_step=1.98e-5, train/loss_step=0.00509, global_step=4499.0]  Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps
Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps



DDIM Sampler:   0%|          | 0/50 [00:00<?, ?it/s][A
DDIM Sampler:   2%|▏         | 1/50 [00:00<00:18,  2.66it/s][A
DDIM Sampler:   4%|▍         | 2/50 [00:00<00:18,  2.63it/s][A
DDIM Sampler:   6%|▌         | 3/50 [00:01<00:17,  2.63it/s][A
DDIM Sampler:   8%|▊         | 4/50 [00:01<00:17,  2.60it/s][A
DDIM Sampler:  10%|█         | 5/50 [00:01<00:17,  2.57it/s][A
DDIM Sampler:  12%|█▏        | 6/50 [00:02<00:17,  2.57it/s][A
DDIM Sampler:  14%|█▍        | 7/50 [00:02<00:16,  2.55it/s][A
DDIM Sampler:  16%|█▌        | 8/50 [00:03<00:16,  2.57it/s][A
DDIM Sampler:  18%|█▊        | 9/50 [00:03<00:16,  2.53it/s][A
DDIM Sampler:  20%|██        | 10/50 [00:03<00:15,  2.55it/s][A
DDIM Sampler:  22%|██▏       | 11/50 [00:04<00:15,  2.55it/s][A
DDIM Sampler:  24%|██▍       | 12/50 [00:04<00:14,  2.55it/s][A
DDIM Sampler:  26%|██▌       | 13/50 [00:05<00:14,  2.54it/s][A
DDIM Sampler:  28%|██▊       | 14/50 [00:05<00:14,  2.54it/s][A
DDIM Sampler:  30%|███       | 15/50 [00:0

Epoch 0:  36%|███▌      | 4501/12500 [3:39:24<6:29:55,  2.92s/it, loss=0.00488, v_num=3, train/loss_simple_step=0.00129, train/loss_vlb_step=5.51e-6, train/loss_step=0.00129, global_step=4500.0]

DDIM Sampler: 100%|██████████| 50/50 [00:19<00:00,  2.54it/s]


Epoch 0:  38%|███▊      | 4800/12500 [3:53:40<6:14:50,  2.92s/it, loss=0.00437, v_num=3, train/loss_simple_step=0.00616, train/loss_vlb_step=3.16e-5, train/loss_step=0.00616, global_step=4799.0]  Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps
Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps



DDIM Sampler:   0%|          | 0/50 [00:00<?, ?it/s][A
DDIM Sampler:   2%|▏         | 1/50 [00:00<00:18,  2.65it/s][A
DDIM Sampler:   4%|▍         | 2/50 [00:00<00:18,  2.62it/s][A
DDIM Sampler:   6%|▌         | 3/50 [00:01<00:18,  2.54it/s][A
DDIM Sampler:   8%|▊         | 4/50 [00:01<00:17,  2.57it/s][A
DDIM Sampler:  10%|█         | 5/50 [00:01<00:17,  2.57it/s][A
DDIM Sampler:  12%|█▏        | 6/50 [00:02<00:17,  2.55it/s][A
DDIM Sampler:  14%|█▍        | 7/50 [00:02<00:16,  2.54it/s][A
DDIM Sampler:  16%|█▌        | 8/50 [00:03<00:16,  2.54it/s][A
DDIM Sampler:  18%|█▊        | 9/50 [00:03<00:16,  2.54it/s][A
DDIM Sampler:  20%|██        | 10/50 [00:03<00:15,  2.55it/s][A
DDIM Sampler:  22%|██▏       | 11/50 [00:04<00:15,  2.56it/s][A
DDIM Sampler:  24%|██▍       | 12/50 [00:04<00:14,  2.55it/s][A
DDIM Sampler:  26%|██▌       | 13/50 [00:05<00:14,  2.57it/s][A
DDIM Sampler:  28%|██▊       | 14/50 [00:05<00:14,  2.57it/s][A
DDIM Sampler:  30%|███       | 15/50 [00:0

Epoch 0:  38%|███▊      | 4801/12500 [3:54:03<6:15:19,  2.93s/it, loss=0.00434, v_num=3, train/loss_simple_step=0.00248, train/loss_vlb_step=9.3e-6, train/loss_step=0.00248, global_step=4800.0] 

DDIM Sampler: 100%|██████████| 50/50 [00:19<00:00,  2.53it/s]


Epoch 0:  41%|████      | 5100/12500 [4:08:18<6:00:16,  2.92s/it, loss=0.00423, v_num=3, train/loss_simple_step=0.00327, train/loss_vlb_step=1.31e-5, train/loss_step=0.00327, global_step=5099.0]  Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps
Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps



DDIM Sampler:   0%|          | 0/50 [00:00<?, ?it/s][A
DDIM Sampler:   2%|▏         | 1/50 [00:00<00:18,  2.61it/s][A
DDIM Sampler:   4%|▍         | 2/50 [00:00<00:18,  2.60it/s][A
DDIM Sampler:   6%|▌         | 3/50 [00:01<00:18,  2.58it/s][A
DDIM Sampler:   8%|▊         | 4/50 [00:01<00:17,  2.60it/s][A
DDIM Sampler:  10%|█         | 5/50 [00:01<00:17,  2.60it/s][A
DDIM Sampler:  12%|█▏        | 6/50 [00:02<00:17,  2.59it/s][A
DDIM Sampler:  14%|█▍        | 7/50 [00:02<00:16,  2.57it/s][A
DDIM Sampler:  16%|█▌        | 8/50 [00:03<00:16,  2.58it/s][A
DDIM Sampler:  18%|█▊        | 9/50 [00:03<00:15,  2.57it/s][A
DDIM Sampler:  20%|██        | 10/50 [00:03<00:15,  2.56it/s][A
DDIM Sampler:  22%|██▏       | 11/50 [00:04<00:15,  2.57it/s][A
DDIM Sampler:  24%|██▍       | 12/50 [00:04<00:14,  2.57it/s][A
DDIM Sampler:  26%|██▌       | 13/50 [00:05<00:14,  2.53it/s][A
DDIM Sampler:  28%|██▊       | 14/50 [00:05<00:14,  2.53it/s][A
DDIM Sampler:  30%|███       | 15/50 [00:0

Epoch 0:  41%|████      | 5101/12500 [4:08:40<6:00:42,  2.93s/it, loss=0.00367, v_num=3, train/loss_simple_step=0.00297, train/loss_vlb_step=1.05e-5, train/loss_step=0.00297, global_step=5100.0]

DDIM Sampler: 100%|██████████| 50/50 [00:19<00:00,  2.53it/s]


Epoch 0:  43%|████▎     | 5400/12500 [4:22:56<5:45:43,  2.92s/it, loss=0.00419, v_num=3, train/loss_simple_step=0.0126, train/loss_vlb_step=0.000165, train/loss_step=0.0126, global_step=5399.0]   Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps
Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps



DDIM Sampler:   0%|          | 0/50 [00:00<?, ?it/s][A
DDIM Sampler:   2%|▏         | 1/50 [00:00<00:18,  2.64it/s][A
DDIM Sampler:   4%|▍         | 2/50 [00:00<00:18,  2.63it/s][A
DDIM Sampler:   6%|▌         | 3/50 [00:01<00:17,  2.63it/s][A
DDIM Sampler:   8%|▊         | 4/50 [00:01<00:17,  2.58it/s][A
DDIM Sampler:  10%|█         | 5/50 [00:01<00:17,  2.57it/s][A
DDIM Sampler:  12%|█▏        | 6/50 [00:02<00:17,  2.56it/s][A
DDIM Sampler:  14%|█▍        | 7/50 [00:02<00:16,  2.56it/s][A
DDIM Sampler:  16%|█▌        | 8/50 [00:03<00:16,  2.54it/s][A
DDIM Sampler:  18%|█▊        | 9/50 [00:03<00:16,  2.53it/s][A
DDIM Sampler:  20%|██        | 10/50 [00:03<00:15,  2.54it/s][A
DDIM Sampler:  22%|██▏       | 11/50 [00:04<00:15,  2.55it/s][A
DDIM Sampler:  24%|██▍       | 12/50 [00:04<00:14,  2.56it/s][A
DDIM Sampler:  26%|██▌       | 13/50 [00:05<00:14,  2.51it/s][A
DDIM Sampler:  28%|██▊       | 14/50 [00:05<00:14,  2.54it/s][A
DDIM Sampler:  30%|███       | 15/50 [00:0

Epoch 0:  43%|████▎     | 5401/12500 [4:23:19<5:46:06,  2.93s/it, loss=0.00417, v_num=3, train/loss_simple_step=0.00155, train/loss_vlb_step=6.26e-6, train/loss_step=0.00155, global_step=5400.0]

DDIM Sampler: 100%|██████████| 50/50 [00:19<00:00,  2.53it/s]


Epoch 0:  46%|████▌     | 5700/12500 [4:37:35<5:31:10,  2.92s/it, loss=0.00312, v_num=3, train/loss_simple_step=0.00626, train/loss_vlb_step=3.75e-5, train/loss_step=0.00626, global_step=5699.0]  Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps
Data shape for DDIM sampling is (2, 4, 64, 64), eta 0.0
Running DDIM Sampling with 50 timesteps



DDIM Sampler:   0%|          | 0/50 [00:00<?, ?it/s][A
DDIM Sampler:   2%|▏         | 1/50 [00:00<00:18,  2.62it/s][A
DDIM Sampler:   4%|▍         | 2/50 [00:00<00:18,  2.61it/s][A
DDIM Sampler:   6%|▌         | 3/50 [00:01<00:18,  2.58it/s][A
DDIM Sampler:   8%|▊         | 4/50 [00:01<00:17,  2.57it/s][A
DDIM Sampler:  10%|█         | 5/50 [00:01<00:17,  2.58it/s][A
DDIM Sampler:  12%|█▏        | 6/50 [00:02<00:17,  2.57it/s][A
DDIM Sampler:  14%|█▍        | 7/50 [00:02<00:16,  2.57it/s][A
DDIM Sampler:  16%|█▌        | 8/50 [00:03<00:16,  2.55it/s][A
DDIM Sampler:  18%|█▊        | 9/50 [00:03<00:15,  2.56it/s][A
DDIM Sampler:  20%|██        | 10/50 [00:03<00:15,  2.54it/s][A
DDIM Sampler:  22%|██▏       | 11/50 [00:04<00:15,  2.54it/s][A
DDIM Sampler:  24%|██▍       | 12/50 [00:04<00:15,  2.52it/s][A
DDIM Sampler:  26%|██▌       | 13/50 [00:05<00:14,  2.52it/s][A
DDIM Sampler:  28%|██▊       | 14/50 [00:05<00:14,  2.52it/s][A
DDIM Sampler:  30%|███       | 15/50 [00:0

Epoch 0:  46%|████▌     | 5701/12500 [4:37:58<5:31:30,  2.93s/it, loss=0.00314, v_num=3, train/loss_simple_step=0.00237, train/loss_vlb_step=1.03e-5, train/loss_step=0.00237, global_step=5700.0]

DDIM Sampler: 100%|██████████| 50/50 [00:19<00:00,  2.52it/s]


Epoch 0:  46%|████▋     | 5800/12500 [4:42:44<5:26:36,  2.92s/it, loss=0.00462, v_num=3, train/loss_simple_step=0.00585, train/loss_vlb_step=2.37e-5, train/loss_step=0.00585, global_step=5799.0]  

: 