In [None]:
from datasets import Dataset
import torch
from torch.utils.data import DataLoader
from diffusers import UNet2DModel, DDPMScheduler, DiffusionPipeline
from diffusers.training_utils import EMAModel
from accelerate import Accelerator
from tqdm import tqdm
import numpy as np
from torch.utils.tensorboard import SummaryWriter
from torch.cuda.amp import autocast, GradScaler
from pathlib import Path
import numpy as np
import soundfile as sf
import matplotlib.pyplot as plt
import os
import librosa
import seaborn as sns


In [None]:
import torch
from diffusers import UNet2DModel, DDPMScheduler, DDPMPipeline
import json

# Paths to your files
ckpt_path           = "ddpm-spectogram-256/unet/diffusion_pytorch_model.bin"
config_path         = "ddpm-spectogram-256/unet\config.json"
scheduler_config    = "ddpm-spectogram-256/scheduler/scheduler_config.json"

# 1) Load model config & instantiate UNet2DModel
with open(config_path, "r") as f:
    unet_cfg = json.load(f)
unet = UNet2DModel(**unet_cfg)

# 2) Load scheduler config & instantiate DDPMScheduler
with open(scheduler_config, "r") as f:
    sched_cfg = json.load(f)
scheduler = DDPMScheduler(**sched_cfg)

# 3) Load weights into UNet
state_dict = torch.load(ckpt_path, map_location="cpu")
unet.load_state_dict(state_dict)

# 4) Assemble pipeline
pipeline = DDPMPipeline(unet=unet, scheduler=scheduler)
pipeline.to("cuda")   # or "cpu"
pipeline.unet.eval()

In [None]:
import os
from pathlib import Path

# Configuration
num_samples = 10
batch_size  = 2
steps       = 1000
output_dir  = Path("generated")

# Make sure output directory exists
output_dir.mkdir(parents=True, exist_ok=True)

# Find existing sample files and determine next index
existing = list(output_dir.glob("sample_*.png"))
if existing:
    # extract the numeric part from filenames like "sample_005.png"
    existing_idxs = [
        int(p.stem.split("_")[1])
        for p in existing
        if p.stem.split("_")[1].isdigit()
    ]
    next_idx = max(existing_idxs) + 1
else:
    next_idx = 0

# Generate images
all_images = []
for _ in range(num_samples // batch_size):
    images = pipeline(batch_size=batch_size, num_inference_steps=steps).images
    all_images.extend(images)

# Save with non-colliding filenames
for i, img in enumerate(all_images):
    idx = next_idx + i
    filename = output_dir / f"sample_{idx:03d}.png"
    img.save(filename)

print(f"Saved {len(all_images)} images, starting at index {next_idx:03d}.")


In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = DiffusionPipeline.from_pretrained("teticio/audio-diffusion-instrumental-hiphop-256").to(device)

In [None]:
from PIL import Image
im = Image.open(r"generated/sample_012.png")
a = pipe.mel.image_to_audio(im)
from IPython.display import Audio
display(Audio(a, rate=22100))

In [None]:
from PIL import Image
im2 = Image.open(r"generated/sample_003.png")
a2 = pipe.mel.image_to_audio(im)
from IPython.display import Audio
display(Audio(a2, rate=22100))

In [None]:
im

In [None]:

sr = 22100
# Plot the original and reconstructed signals
plt.figure(figsize=(12, 8))

plt.subplot(2, 1, 1)
librosa.display.waveshow(a, sr=sr, alpha=0.5)
plt.title("Original Audio")
plt.xlabel("Time")
plt.ylabel("Amplitude")

plt.subplot(2, 1, 2)
librosa.display.waveshow(a2, sr=sr, color='r', alpha=0.5)
plt.title("Reconstructed Audio")
plt.xlabel("Time")
plt.ylabel("Amplitude")

plt.tight_layout()
plt.show()