In [None]:
!pip install datasets
!pip install diffusers accelerate transformers
!pip install tensorboard

In [14]:
from datasets import Dataset
import torch
from torch.utils.data import DataLoader
from diffusers import UNet2DModel, DDPMScheduler, DiffusionPipeline
from diffusers.training_utils import EMAModel
from accelerate import Accelerator
from tqdm import tqdm
import numpy as np
import pickle
import os
from torch.utils.tensorboard import SummaryWriter
from torch.cuda.amp import autocast, GradScaler


In [None]:
# === Load dataset ===
dataset = pickle.load(open("specs_512.p", "rb"))
dataset = dataset.astype(np.float32)

In [16]:
# Add channel dim: [1, 1024, 1024]
dataset = [{"spectrogram": torch.from_numpy(spec).unsqueeze(0)} for spec in dataset]
dataset = Dataset.from_list(dataset)

In [17]:
# DataLoader
dataset = DataLoader(dataset.with_format("torch"), batch_size=1, shuffle=False)

In [18]:
# === Define UNet and Diffusion Scheduler ===
model = UNet2DModel(
    sample_size=512,
    in_channels=1,
    out_channels=1,
    layers_per_block=2,
    block_out_channels=(128, 128, 256, 256, 512),
    down_block_types=("DownBlock2D", "DownBlock2D", "AttnDownBlock2D", "DownBlock2D", "AttnDownBlock2D"),
    up_block_types=("AttnUpBlock2D", "UpBlock2D", "AttnUpBlock2D", "UpBlock2D", "UpBlock2D"),
)
noise_scheduler = DDPMScheduler(num_train_timesteps=1000)

In [19]:
# === Training Setup ===
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
accelerator = Accelerator()
model, optimizer, dataloader = accelerator.prepare(model, optimizer, dataset)

In [20]:
checkpoint_dir = 'checkpoints/'

In [21]:
writer = SummaryWriter(log_dir="runs/ddpm-spectogram-512")

In [13]:
# === Training Loop ===
model.train()
num_epochs = 10
global_step = 0
for epoch in range(num_epochs):
    progress_bar = tqdm(dataloader, desc=f"Epoch {epoch+1}")
    for batch in progress_bar:
        clean_spectrograms = batch["spectrogram"]  # shape: [B, 1, 1024, 1024]

        noise = torch.randn_like(clean_spectrograms)
        timesteps = torch.randint(0, noise_scheduler.num_train_timesteps, (clean_spectrograms.shape[0],), device=clean_spectrograms.device).long()

        noisy_spectrograms = noise_scheduler.add_noise(clean_spectrograms, noise, timesteps)
        noise_pred = model(noisy_spectrograms, timesteps).sample

        loss = torch.nn.functional.mse_loss(noise_pred, noise)
        accelerator.backward(loss)
        optimizer.step()
        optimizer.zero_grad()

        writer.add_scalar("Loss/train", loss.item(), global_step)
        global_step += 1

        progress_bar.set_postfix(loss=loss.item())
    # Save checkpoint after each epoch
    checkpoint_path = os.path.join(checkpoint_dir, f"checkpoint_epoch_{epoch+1}.pt")
    torch.save({
        'epoch': epoch + 1,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': loss.item(),
    }, checkpoint_path)
    print(f"Checkpoint saved at {checkpoint_path}")
writer.close()

  deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False)
Epoch 1:   1%|          | 5/720 [07:52<18:46:49, 94.56s/it, loss=0.0739]


KeyboardInterrupt: 

In [None]:
def inference(checkpoint_path, input_spectrograms): # input : shape: (batch_size, 512, 512)
  #checkpoint_path = "checkpoints/checkpoint_epoch_10.pt"
  checkpoint = torch.load(checkpoint_path)
  model.load_state_dict(checkpoint['model_state_dict'])

  # Set model to evaluation mode
  model.eval()

  # If you're using the same scheduler, initialize it
  noise_scheduler = DDPMScheduler(num_train_timesteps=1000)

  # === Prepare Input ===
  # Example: Load a new batch of spectrograms for inference (replace with your input)
  #input_spectrograms = np.load("your_spectrogram_file_for_inference.npy")  # shape: (batch_size, 512, 512)
  input_spectrograms = input_spectrograms.astype(np.float32)
  input_tensor = torch.from_numpy(input_spectrograms).unsqueeze(1)  # Add channel dimension: [B, 1, 512, 512]

  # Move input tensor to the same device as the model (use CPU or GPU)
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  model.to(device)
  input_tensor = input_tensor.to(device)

  # === Perform Inference ===
  # Inference loop: For each spectrogram, apply the noise scheduler and predict the clean version
  with torch.no_grad():  # No need to compute gradients during inference
      # You can provide timesteps if you're using a diffusion model to generate
      timesteps = torch.randint(0, noise_scheduler.num_train_timesteps, (input_tensor.shape[0],), device=device).long()

      # Apply noise scheduler (to simulate the noisy input, just like during training)
      noise = torch.randn_like(input_tensor)  # Random noise
      noisy_spectrograms = noise_scheduler.add_noise(input_tensor, noise, timesteps)

      # Pass the noisy spectrograms through the model
      output = model(noisy_spectrograms, timesteps).sample

      # The output is the model's predicted clean spectrograms
      print(f"Inference Output Shape: {output.shape}")
  return output

In [None]:
checkpoint_path = '/content/drive/MyDrive/aud/checkpoints/checkpoint_epoch_10.pt'

In [None]:
temp = dataset[50].reshape(1,512,512)

In [None]:
output = inference(checkpoint_path, temp)

In [None]:
output

In [None]:
output.shape

In [None]:
output.min()

In [None]:
output_numpy = output.squeeze(0).squeeze(0).cpu().numpy()

In [None]:
output_numpy.min()

In [None]:
pickle.dump(output_numpy, open("inference.p", "wb"))

In [None]:
from torch.utils.tensorboard import SummaryWriter

In [None]:
# Initialize TensorBoard writer
writer = SummaryWriter(log_dir="/content/drive/MyDrive/aud/checkpoints/")