In [1]:
from dataclasses import dataclass
from dataloader.celeba_pbr import get_dataloader

import torch
from models.sd_models import ShadingDiffusion

@dataclass
class Configs:
    
    # Experiment settings
    exp_name: str = "exp_01"
    image_size: tuple = (256, 256)  # the generated image resolution
    random_seed: int = 0
    
    # Training settings
    train_mode = "vae"  # `ddpm` for DDPM, `vae` for VAE
    train_batch_size: int = 10
    learning_rate = 1e-4
    lr_warmup_steps = 500
    num_epochs = 50
    save_image_epochs = 10
    save_model_epochs = 30
    mixed_precision = "fp16"  # `no` for float32, `fp16` for automatic mixed precision
    #gradient_accumulation_steps = 1
    
    # Evaluation settings
    eval_batch_size = 10  # how many images to sample during evaluation
    
    # Path settings
    data_dir = "dataset/celeba-pbr"
    fov_file_dir = "dataset/celeba-pbr/pred_fov.json"
    output_dir = "ddpm-butterflies-128"  # the model name locally and on the HF Hub
    #overwrite_output_dir = True  # overwrite the old model when re-running the notebook
    
    # Hugging Face Hub settings
    push_to_hub = False  # whether to upload the saved model to the HF Hub
    hub_model_id = "<your-username>/<my-awesome-model>"  # the name of the repository to create on the HF Hub
    hub_private_repo = None

configs = Configs()

model = ShadingDiffusion(configs).cuda()

train_loader, eval_loader = get_dataloader(configs)
train_iter = iter(train_loader)
a = next(train_iter)

pos = a['pos_in_cam_gt'].cuda()
normal = a['normal_gt'].cuda()
input = torch.cat([pos, normal], dim=1)

In [3]:
latents = model.vae.encode(input)

In [10]:
latents.latent_dist.sample().min(), latents.latent_dist.sample().max()

(tensor(-36.6083, device='cuda:0', grad_fn=<MinBackward1>),
 tensor(104.4023, device='cuda:0', grad_fn=<MaxBackward1>))

In [None]:
import torchvision.transforms.functional as tvf

tvf.to_pil_image((pos.permute(2,0,1) / pos.max()))

In [None]:
from diffusers import AutoPipelineForText2Image
import torch

pipe = AutoPipelineForText2Image.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16, variant="fp16")
pipe.to("cuda")

prompt = "A cinematic shot of a baby racoon wearing an intricate italian priest robe."

In [None]:
pipe(prompt=prompt, num_inference_steps=1, guidance_scale=0.0).images

In [None]:
image = pipe(prompt=prompt, num_inference_steps=1, guidance_scale=0.0).images[0]

In [None]:
import numpy as np

i = np.array(image)

In [None]:
i / 255.