In [1]:
import sys
sys.path.append('/home/npopkov/dll24')

import h5py
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
import torch
torch.set_float32_matmul_precision('medium')

class LatentDataset(Dataset):
    def __init__(self, h5_file):
        self.shape = np.array(h5_file[list(h5_file.keys())[0]]).shape
        self.data = self.createData(h5_file)
        self.min = self.data.min()
        self.max = self.data.max()
        self.std = self.data.std()
        self.mean = self.data.mean()
        self.transform('normalize')


    def createData(self, h5_file):
        data = []
        for key in h5_file.keys():
        
            sample = np.array(h5_file[key])
            
            data.append(np.array(sample))
        
        return torch.tensor(np.array(data))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]
    
    def transform(self, type: str = 'normalize'):
        if type == 'standardize':
            self.data = (self.data - self.mean) / self.std
        elif type == 'normalize':
            self.data = (self.data - self.min) / (self.max - self.min)
        else:
            raise ValueError('Unknown transformation type')
        
    def inverse_transform(self, data):

        return data * (self.max - self.min) + self.min
    
    def unflatten(self, data):
        return data.reshape(self.shape)
    

hdf = h5py.File('256encodesamp.hdf5', 'r')
dataset = LatentDataset(hdf)
hdf.close()

In [2]:
dataset.data.shape

torch.Size([5756, 256, 256])

In [3]:
# from models.ddpm.DDPMBase import DDPMBase
# from models.ddpm.CAUnet import CAUnet
# from models.ddpm.oneDCAUnet import oneDCAUnet


# n_steps, betaMin, betaMax = 200, 10**-6, 0.002

# #network = CAUnet(n_steps=n_steps,
# #                 c_in=256,
# #                 c_out=256,).to('cuda')

# #model = DDPMBase(network=network,
# #                betaMin=betaMin,
# #                betaMax=betaMax,
# #                n_steps=n_steps).to('cuda')


In [4]:
# from audio_diffusion_pytorch import DiffusionModel, UNetV0, VDiffusion, VSampler
# fac = 4
# model = DiffusionModel(
#     net_t=UNetV0, # The model type used for diffusion (U-Net V0 in this case)
#     in_channels=256, # U-Net: number of input/output (audio) channels
#     channels=[fac*8, fac*32, fac*64, fac*128, fac*256, fac*512, fac*512, fac*1024, fac*1024], # U-Net: channels at each layer
#     factors=[1, 4, 4, 4, 2, 2, 2, 2, 2], # U-Net: downsampling and upsampling factors at each layer
#     items=[1, 2, 2, 2, 2, 2, 2, 4, 4], # U-Net: number of repeating items at each layer
#     attentions=[1,1,1, 1, 1, 1, 1, 1, 1], # U-Net: attention enabled/disabled at each layer
#     attention_heads=16, # U-Net: number of attention heads per attention item
#     attention_features=64, # U-Net: number of attention features per attention item
#     diffusion_t=VDiffusion, # The diffusion method used
#     sampler_t=VSampler, # The diffusion sampler used
# )

In [5]:
import torch
from denoising_diffusion_pytorch import Unet, GaussianDiffusion

model = Unet(
    dim = 64,
    channels=1,
    dim_mults = (1, 2, 4, 8),
    flash_attn = True
)

diffusion = GaussianDiffusion(
    model,
    image_size = 256,
    timesteps = 1000    # number of steps
)




  from .autonotebook import tqdm as notebook_tqdm


A100 GPU detected, using flash attention if input tensor is on cuda


In [6]:
import lightning as lt

class Lightningwrapper(lt.LightningModule):
    def __init__(self, model):
        super().__init__()
        self.model = model
    def forward(self, x):
        return self.model(x)
    def training_step(self, batch, batch_idx):
        batch = batch.unsqueeze(1)
        loss = self.model(batch)
        self.log('train_loss', loss)
        return loss
    def configure_optimizers(self):
        return torch.optim.AdamW(self.model.parameters(), lr=3e-4)

lt_model = Lightningwrapper(diffusion)
dataloader = DataLoader(dataset, batch_size=64,num_workers=30,shuffle=True,)
# Train model with audio waveforms
trainer = lt.Trainer(max_epochs=50)
trainer.fit(lt_model, dataloader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type              | Params | Mode 
----------------------------------------------------
0 | model | GaussianDiffusion | 35.7 M | train
----------------------------------------------------
35.7 M    Trainable params
0         Non-trainable params
35.7 M    Total params
142.820   Total estimated model params size (MB)
378       Modules in train mode
0         Modules in eval mode


Epoch 0:   0%|          | 0/90 [00:00<?, ?it/s] 

OutOfMemoryError: CUDA out of memory. Tried to allocate 1024.00 MiB. GPU 0 has a total capacty of 23.65 GiB of which 949.69 MiB is free. Including non-PyTorch memory, this process has 22.71 GiB memory in use. Of the allocated memory 21.36 GiB is allocated by PyTorch, and 909.25 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
# after a lot of training

sampled_images = diffusion.sample(batch_size = 4)
sampled_images.shape # (4, 3, 128, 128)