In [1]:
from datasets import Dataset
import torch
from torch.utils.data import DataLoader
from diffusers import UNet2DModel, DDPMScheduler, DiffusionPipeline
from diffusers.training_utils import EMAModel
from accelerate import Accelerator
from tqdm import tqdm
import numpy as np
import pickle
import os
from torch.utils.tensorboard import SummaryWriter
from torch.cuda.amp import autocast, GradScaler


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# === Define UNet and Diffusion Scheduler ===
model = UNet2DModel(
    sample_size=256,
    in_channels=1,
    out_channels=1,
    layers_per_block=2,
    block_out_channels=(128, 128, 256, 256, 512),
    down_block_types=("DownBlock2D", "DownBlock2D", "AttnDownBlock2D", "DownBlock2D", "AttnDownBlock2D"),
    up_block_types=("AttnUpBlock2D", "UpBlock2D", "AttnUpBlock2D", "UpBlock2D", "UpBlock2D"),
)
noise_scheduler = DDPMScheduler(num_train_timesteps=1000)

In [5]:
def inference(checkpoint_path, input_spectrograms): # input : shape: (batch_size, 512, 512)
  #checkpoint_path = "checkpoints/checkpoint_epoch_10.pt"
  checkpoint = torch.load(checkpoint_path)
  model.load_state_dict(checkpoint['model_state_dict'])

  # Set model to evaluation mode
  model.eval()

  # If you're using the same scheduler, initialize it
  noise_scheduler = DDPMScheduler(num_train_timesteps=1000)

  # === Prepare Input ===
  # Example: Load a new batch of spectrograms for inference (replace with your input)
  #input_spectrograms = np.load("your_spectrogram_file_for_inference.npy")  # shape: (batch_size, 512, 512)
  input_spectrograms = input_spectrograms.astype(np.float32)
  input_tensor = torch.from_numpy(input_spectrograms).unsqueeze(1)  # Add channel dimension: [B, 1, 512, 512]

  # Move input tensor to the same device as the model (use CPU or GPU)
  device = torch.device("cpu")
  model.to(device)
  input_tensor = input_tensor.to(device)

  # === Perform Inference ===
  # Inference loop: For each spectrogram, apply the noise scheduler and predict the clean version
  with torch.no_grad():  # No need to compute gradients during inference
      # You can provide timesteps if you're using a diffusion model to generate
      timesteps = torch.randint(0, noise_scheduler.num_train_timesteps, (input_tensor.shape[0],), device=device).long()

      # Apply noise scheduler (to simulate the noisy input, just like during training)
      noise = torch.randn_like(input_tensor)  # Random noise
      noisy_spectrograms = noise_scheduler.add_noise(input_tensor, noise, timesteps)

      # Pass the noisy spectrograms through the model
      output = model(noisy_spectrograms, timesteps).sample

      # The output is the model's predicted clean spectrograms
      print(f"Inference Output Shape: {output.shape}")
  return output

In [54]:
checkpoint_path = 'checkpoints/checkpoint_epoch_1.pt'

In [55]:
dataset = pickle.load(open("specs_256.p", "rb"))
dataset = dataset.astype(np.float32)

In [56]:
temp = np.zeros((1,256,256))

In [57]:
output = inference(checkpoint_path, dataset[0].reshape((1,256,256)))

  deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False)


Inference Output Shape: torch.Size([1, 1, 256, 256])


In [58]:
output_numpy = output.squeeze(0).squeeze(0).cpu().numpy()

In [60]:
output_numpy.max()

np.float32(4.213393)