# Training the Model

In this notebook we show how to train the diffusion model.

Before running the notebook make sure to upload the ```dataset.zip``` file and the ```denoising_diffusion_pytorch.py``` file (can be found [here](https://github.com/Lilac-code/music-diffusion/tree/main)). Also, if there is a ```checkpoint.pth```, upload that as well.

If using Kaggle, then upload these as datasets named 'dataset', 'unetfile' and 'checkpoint' respectively.

Firstly we unzip the ```dataset.zip``` file (that contains a directory named dataset containing the piano roll segments), and install all the dependencies.

If using Kaggle then unziping the file is not neccessary.

In [None]:
!unzip dataset.zip

In [None]:
!pip install ema_pytorch
!pip install einops
!pip install accelerate
#!pip install GPUtil #for 2 GPUs

In [None]:
from PIL import Image
import os
from torchvision import transforms as T, utils
import torch
from torch import nn
#from GPUtil import showUtilization as gpu_usage #for 2 GPUs
#from torch.nn.parallel import DataParallel
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
import numpy as np
from numba import cuda

transform = T.Compose([T.ToTensor()])

We then prepare the dataset. To do that we transform the images to tensors, and then create a dataloader with those tensors. This dataloader can also add noise on the fly to generate the desired samples.

In [None]:
segments=[]
for img in os.listdir('/kaggle/input/dataset/dataset'):   # or just 'dataset'
  f = os.path.join('/kaggle/input/dataset/dataset', img)  # or just 'dataset'
  image=Image.open(f)
  image=transform(image)
  segments.append(image)

In [None]:
def calc_ratio():
  ratio=0
  for im in segments:
    ratio+=torch.sum(im).item()
  ratio/=len(segments)
  ratio/=len(segments[0][0])
  ratio/=len(segments[0][0][0])
  return ratio

In [None]:
# class that on the fly creates a batch from the dataset
class Pianoroll(Dataset):
    def __init__(self, rolls, ratio):
        super(Pianoroll).__init__()
        self.rolls = rolls
        self.num_steps = 100
        self.ratio = ratio
    def __len__(self):
        return len(self.rolls) * self.num_steps

    def __getitem__(self, index):
        roll = self.rolls[index//self.num_steps]
        t = index%self.num_steps
        beta = (t+1)/self.num_steps
        noisy = np.random.binomial(1, np.asarray(roll)*(1-beta)+self.ratio*beta)
        return np.array(noisy,dtype=np.float32), np.array(roll,dtype=np.float32), t

# create dataloader_train that feeds the network 50 epochs of the training data with shuffling
# segments is the list of piano-roll segment to train the network on
# ratio is the ratio of 1s to the size of the piano-roll segment (#rows by #columns)
ratio=calc_ratio()
pr = Pianoroll(rolls=segments, ratio=ratio)
batch_size = 20
dataloader_train = DataLoader(pr, batch_size=batch_size, shuffle=True)

In order to import the UNet class (and the checkpoint if it exists) in Kaggle we have to copy the file into the working directory.

In [None]:
from shutil import copyfile

# copy file into the working directory (make sure it has .py suffix)
copyfile(src = "/kaggle/input/unetfile/denoising_diffusion_pytorch.py", dst = "/kaggle/working/denoising_diffusion_pytorch.py")

# copyfile(src = "/kaggle/input/checkpoint/checkpoint.pth", dst = "/kaggle/working/checkpoint.pth") #if there is a checkpoint

Making sure that the dataloader has the right lenght - must be equal to the number of segments multiplied by 100 and divided by the batch size.

In [None]:
print('length of dataloader:',len(dataloader_train))

Code for 2 GPUs (Kaggle)

In [None]:
# the unet implementation from https://github.com/lucidrains/denoising-diffusion-pytorch
from denoising_diffusion_pytorch import Unet

epochs = 50

unet = Unet(dim=48, channels=1, resnet_block_groups=3, dim_mults=(1, 2, 4, 4))
device_ids = [0, 1]  # Specify the GPU device IDs
unet=DataParallel(unet, device_ids=device_ids)
unet = unet.cuda(device_ids[0])
# Load checkpoint if it exists
checkpoint_path = '/kaggle/working/checkpoint.pth'
if os.path.exists(checkpoint_path):
    checkpoint = torch.load(checkpoint_path)
    unet.load_state_dict(checkpoint['model_state_dict'])
    params = list(unet.parameters())
    optimizer = Adam(params, lr=5e-5)
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    start_epoch = checkpoint['epoch'] + 1
    print(f"Resuming training from epoch {start_epoch}")
else:
    start_epoch = 0
    params = list(unet.parameters())
    optimizer = Adam(params, lr=5e-5)

optimizer.zero_grad()
loss_function = nn.L1Loss()

# Training loop
for epoch in range(start_epoch, epochs):
    print(f"Epoch {epoch + 1}/{epochs}")

    for step,batch in enumerate(dataloader_train):
        if step%2==0:
            batch1=batch
            continue
        batch2=batch

        batch_roll1 = batch1[0].cuda(device_ids[0])
        batch_time1 = batch1[2].cuda(device_ids[0])
        batch_roll2 = batch2[0].cuda(device_ids[1])
        batch_time2 = batch2[2].cuda(device_ids[1])

        predicted_x01 = unet(batch_roll1, batch_time1)
        predicted_x02 = unet(batch_roll2, batch_time2)

        loss = loss_function(predicted_x01.float(), batch1[1].float().to(device_ids[0]))
        loss.backward()
        loss = loss_function(predicted_x02.float(), batch2[1].float().to(device_ids[0]))
        loss.backward()
        if step%10==9:
            optimizer.step()
            optimizer.zero_grad()
    # Log the loss at the end of each epoch
    print(f"Epoch {epoch + 1}, Loss: {loss.item()}")
    # Save a checkpoint at the end of each epoch
    checkpoint = {
            'epoch': epoch,
            'model_state_dict': unet.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss.item(),
    }
    torch.save(checkpoint, checkpoint_path)

print("Training complete.")

Code for 1 GPU

In [None]:
epochs = 50

# Load checkpoint if it exists
checkpoint_path = './checkpoint.pth'
if os.path.exists(checkpoint_path):
    checkpoint = torch.load(checkpoint_path)
    unet.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    start_epoch = checkpoint['epoch'] + 1
    print(f"Resuming training from epoch {start_epoch}")
else:
    start_epoch = 0
    unet = Unet(dim=48, channels=1, resnet_block_groups=3, dim_mults=(1, 2, 4, 4))
    unet.to('cuda')
    params = list(unet.parameters())
    optimizer = Adam(params, lr=5e-5)
    optimizer.zero_grad()

loss_function = nn.L1Loss()

# Training loop
for epoch in range(start_epoch, epochs):
    print(f"Epoch {epoch + 1}/{epochs}")

    for step, batch in enumerate(dataloader_train):
        batch_roll = batch[0].cuda()
        batch_time = batch[2].cuda()

        predicted_x0 = unet(batch_roll, batch_time)

        loss = loss_function(predicted_x0.float(), batch[1].float().cuda())
        loss.backward()

        optimizer.step()
        optimizer.zero_grad()

    # Log the loss at the end of each epoch
    print(f"Epoch {epoch + 1}, Loss: {loss.item()}")

    # Save a checkpoint at the end of each epoch
    checkpoint = {
        'epoch': epoch,
        'model_state_dict': unet.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': loss.item(),
    }
    torch.save(checkpoint, checkpoint_path)
    if (epoch+1)%5==0:
      files.download(checkpoint_path)

print("Training complete.")