In [None]:
import torch
import matplotlib.pyplot as plt

# Set the mean value for mixture components
mu = 10

# Number of samples per component
n_samples = 500

# Means for two components in 2D
means = torch.tensor([[mu, mu], [-mu, -mu]], dtype=torch.float)

# Covariance (identity for both)
cov = torch.eye(2)

# Sample standard normal, then shift by mean
def sample_gaussian(mean, cov, n):
    return torch.randn(n, 2) @ cov.sqrt() + mean

# Generate samples for each component
samples_1 = sample_gaussian(means[0], cov, n_samples)
samples_2 = sample_gaussian(means[1], cov, n_samples)

# Stack together
mixture_samples = torch.cat([samples_1, samples_2], dim=0)

# Visualize the samples
plt.figure(figsize=(6, 6))
plt.scatter(samples_1[:, 0], samples_1[:, 1], alpha=0.6, label='Component 1')
plt.scatter(samples_2[:, 0], samples_2[:, 1], alpha=0.6, label='Component 2')
plt.scatter(mixture_samples[:, 0], mixture_samples[:, 1], s=5, color='gray', alpha=0.2, label='Mixture (all)')
plt.xlabel('x')
plt.ylabel('y')
plt.title('Samples from 2D Gaussian Mixture ($\mu={}$, $-\mu={}$)'.format(mu, -mu))
plt.legend()
plt.axis('equal')
plt.grid(True)
plt.show()




In [None]:

import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import trange

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Data and configs
data = mixture_samples.to(device)
n, d = data.shape
T = 100

# Define the two-layer model
class TwoLayerDiffusionModel(nn.Module):
    def __init__(self, d):
        super().__init__()
        self.linear1 = nn.Linear(d, 1)
        self.tanh = nn.Tanh()
        self.linear2 = nn.Linear(1, d)

    def forward(self, x):
        out = self.linear1(x)
        out = self.tanh(out)
        out = self.linear2(out)
        return out

model = TwoLayerDiffusionModel(d).to(device)

# Diffusion (DDPM-style) parameters
beta_start = 1e-4
beta_end = 0.02
betas = torch.linspace(beta_start, beta_end, T).to(device)
alphas = 1. - betas
alpha_bars = torch.cumprod(alphas, dim=0)

# Loss: predict the added noise
optimizer = optim.Adam(model.parameters(), lr=1e-3)

epochs = 1000  # adjust as necessary
batch_size = 256

def get_batch():
    idx = torch.randint(0, data.shape[0], (batch_size,))
    return data[idx]

for epoch in trange(epochs):
    x0 = get_batch().to(device)  # [batch_size, d]
    t = torch.randint(0, T, (batch_size,), device=device).long()
    noise = torch.randn_like(x0)

    alpha_bar_t = alpha_bars[t].view(-1, 1)
    xt = (alpha_bar_t.sqrt()) * x0 + (1 - alpha_bar_t).sqrt() * noise

    pred_noise = model(xt)
    loss = ((pred_noise - noise) ** 2).mean()

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch % 100 == 0 or epoch == epochs - 1:
        print(f"Epoch {epoch} | Loss: {loss.item():.4f}")

# Generation function (DDPM sampling)
@torch.no_grad()
def sample_ddpm(model, n_samples, d, T):
    x = torch.randn(n_samples, d).to(device)
    for t in reversed(range(T)):
        beta_t = betas[t]
        alpha_t = alphas[t]
        alpha_bar_t = alpha_bars[t]
        # Model predicts noise
        pred_noise = model(x)
        coef_one = 1 / alpha_t.sqrt()
        coef_two = (1 - alpha_t) / (1 - alpha_bar_t).sqrt()
        # Update
        x = coef_one * (x - coef_two * pred_noise)
        if t > 0:
            noise = torch.randn_like(x)
            x = x + beta_t.sqrt() * noise  # add stochasticity for t > 0
    return x.cpu()

# Generate samples
gen_samples = sample_ddpm(model, 1000, d, T)

# Visualization
plt.figure(figsize=(6,6))
plt.scatter(gen_samples[:,0], gen_samples[:,1], alpha=0.5, label='Generated', color='red', s=8)
plt.scatter(data[:,0].cpu(), data[:,1].cpu(), alpha=0.15, label='Training (Mixture)', color='gray', s=8)
plt.xlabel('x')
plt.ylabel('y')
plt.title('Generated samples from trained diffusion model')
plt.legend()
plt.axis('equal')
plt.grid(True)
plt.show()

