# Training of DiffInstruct
We need a notebook to run on google collab as loading two diffusion models, a GAN and data in the vRAM is too much for a 8GB laptop GPU. Who would have thought.

In [None]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from DiffInstruct import train_diff_instruct
from DiffInstruct import w

Parameters:

In [None]:
batch_size = 128
lr_phi = 1e-4
lr_theta = 1e-4
timesteps = 2000
latent_dim = 128
patience = 10
epochs = 50
device = 'cuda' if torch.cuda.is_available() else 'cpu'

dm_path = "DM_models/UNet_4layers_128hc_2000steps.pth"
generator_path = "GAN_models/best_generator.pth"
save_path_phi = "DI_models/DI_phi.pth"
save_path_theta = "DI_models/DI_generator.pth"

Load the data:

In [None]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])
dataset = datasets.FashionMNIST(root="./data", train=True, transform=transform, download=True)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

Set training args:

In [None]:
betas = torch.linspace(1e-4, 0.02, timesteps).to(device)
alphas = 1.0 - betas
sqrt_alphas_cumprod = torch.sqrt(torch.cumprod(alphas, axis=0)).to(device)
sqrt_one_minus_alphas_cumprod = torch.sqrt(1.0 - torch.cumprod(alphas, axis=0)).to(device)

Now we train :

In [None]:
train_diff_instruct(
    dm_path=dm_path,
    generator_path=generator_path,
    dataloader=dataloader,
    betas=betas,
    sqrt_alphas_cumprod=sqrt_alphas_cumprod,
    sqrt_one_minus_alphas_cumprod=sqrt_one_minus_alphas_cumprod,
    w=w,
    lr_phi=lr_phi,
    lr_theta=lr_theta,
    save_path_phi=save_path_phi,
    save_path_theta=save_path_theta,
    patience=patience,
    device=device,
    epochs=epochs,
    timesteps=timesteps,
    latent_dim=latent_dim
)