## Summary

### Forward process:

    n_steps = 512  # This is called T in the formulas
    # Linear noise schedule
    beta = linspace(start, end, n_steps)

    # Precompute some quantities for the reparametrization
    alpha = 1. - beta
    alpha_bar = cumprod(alpha, axis=0)

    sqrt_alpha_bar = sqrt(alpha_bar)
    sqrt_one_minus_alpha_bar = sqrt(1. - alpha_bar)

    model = UNet()
    optimizer = Adam(model.parameters(), lr=0.001)

    for batch, _ in dataloader:
        ... ## move batch to GPU, reset optimizers, then:
        
        bs = batch.shape[0]
        # Random time steps, one for each image in the
        # mini-batch
        t = torch.randint(0, T, (bs,)).long()
        
        # Generate noise and add it to the images
        # (forward pass)
        noise = torch.randn_like(batch, device=device)
        x_noisy = (
            sqrt_alpha_bar[t].view(bs, 1, 1, 1) * batch + 
            sqrt_one_minus_alpha_bar[t].view(bs, 1, 1, 1) * noise
        )
        
        # Predict the noise using the model
        # NOTE how the model receives in input the noisy
        # image AND the time step t
        noise_pred = model(x_noisy, t)
        
        # Approximate taking the expectation by averaging
        # the MSE loss over the minibatch and the random
        # time steps
        loss = F.mse_loss(noise, noise_pred)
        
        # Compute gradients and optimize the model weights
        loss.backward()
        optimizer.step()


## Additional References

[]()