In [1]:
import os
import math

import torch
import torch.nn.functional as F
from tqdm import tqdm
from PIL import Image, ImageDraw

from scorefield.models.ddpm.denoising_diffusion import Unet
from scorefield.models.ddpm.denoising_diffusion_1d import Unet1D, GaussianDiffusion1D, Dataset1D, Trainer1D
from scorefield.models.ddpm.gaussian_diffusion import Diffusion
# from scorefield.utils.rendering import Maze2dRenderer
from scorefield.utils.rl_utils import load_config
from scorefield.utils.utils import log_num_check, imshow, gen_goals, random_batch, eval_batch, prepare_input, search_limit
from scorefield.utils.diffusion_utils import bilinear_interpolate


In [2]:
# Args
config_dir = "./scorefield/configs/diffusion.yaml"
args = load_config(config_dir)
device = args['device']

model_path = os.path.join(args['log_path'], args['model_path'])

map_img = Image.open("map.png")

In [3]:
class Unet2D(Unet):
    def __init__(
        self, 
        dim, 
        out_dim, 
        dim_mults=(1, 2, 4, 8)
    ):
        super().__init__(dim=dim, out_dim=out_dim, dim_mults=dim_mults)
        
    def forward(self, obs, x, t):
        score_map = super().forward(obs, t)
        score = bilinear_interpolate(score_map, x)    # output: (B,2)
        return score

img_size = args['image_size']
noise_steps = args['noise_steps']
train_lr = args['train_lr']
beta = args['beta']
    
model = Unet2D(
    dim=img_size,
    out_dim = 2,
    dim_mults = (1, 2, 4, 8),
).to(device)

diffusion = Diffusion(
    input_size = (2,), 
    noise_steps= noise_steps,
    beta_start=beta,
    beta_end=beta,
    device=device,
)

optim = torch.optim.Adam(params=model.parameters(), lr=train_lr)

In [42]:
x0 = (torch.rand(1, 2, device=device, dtype=torch.float32)*2 -1.) *0.1
obs = prepare_input(args, map_img, goal_pos=x0, circle_rad=2)
t = diffusion.sample_timesteps(1).to(device)
x_t = diffusion.noise_state(x0, t).to(device)
target = -(x_t - x0) / beta
predict = model(obs, x_t, t)
print(target)
print(predict)

tensor([[-2070.7644, -1064.1831]], device='cuda:0')
tensor([[373.7335, -74.9232]], device='cuda:0', grad_fn=<SqueezeBackward1>)


In [5]:
epochs = args['epochs']
batch_size = args['batch_size']

for iters in range(epochs):
    model.train()
    
    x0 = (torch.rand(batch_size, 2, device=device, dtype=torch.float32)*2 -1.) *0.1
    obs = prepare_input(args, map_img, goal_pos=x0, circle_rad=2)
    t = diffusion.sample_timesteps(batch_size).to(device)
    
    x_t = diffusion.noise_state(x0, t).to(device)
    target_score = (-(x_t - x0) / beta)
    target_score = target_score.view(target_score.shape[0], -1)
    predicted_score = model(obs, x_t, t)
    predicted_score = predicted_score.view(predicted_score.shape[0],-1)
    
    loss = 1 / 2. * ((predicted_score - target_score)**2).sum(dim=-1).mean(dim=0)
    
    # loss = F.mse_loss(predicted_score, target_score)
    
    optim.zero_grad()
    loss.backward()
    optim.step()
    
    if iters % 100 == 0:
        print(f"iter {iters}: {loss.item()}")
    
    

iter 0: 0.08566910773515701
iter 100: 152559648768.0
iter 200: 42109848.0
iter 300: 29346332.0
iter 400: 16348562.0
iter 500: 16975400.0
iter 600: 6292559.5


KeyboardInterrupt: 

In [None]:
torch.save(model.state_dict(), "./logs/pretrained/denoising.pt")

In [6]:
model = model.eval()

eval_batch_size = args['eval_batch_size']
init_state = args['init_state']

obs = eval_batch(renderer, map_img, init_state, eval_batch_size, device=device)
obs = torch.tensor(obs, dtype=torch.float32).to(device)
x = torch.tensor(init_state).to(device)

dt = 0.01
trajectory = [x]

for t in tqdm(range(noise_steps)):
    with torch.no_grad():
        score = model(obs, x, noise_steps-t)
    z_t = torch.randn_like(x)
        
    x = x + score * dt / 2. + math.sqrt(dt) * z_t
    trajectory.append(x)
trajectory = torch.tensor(trajectory)

KeyError: 'eval_batch_size'