# StyleGAN2-ADA Training — Latent Resonance Spectrograms (Kaggle)

Train StyleGAN2-ADA on 512×512 grayscale spectrogram images.
ADA (Adaptive Discriminator Augmentation) is purpose-built for limited-data regimes,
making it a better fit than StyleGAN3 for small datasets (435–489 images).

**Setup:** In the Kaggle sidebar, go to **Settings → Accelerator → GPU T4 x2**.

**Dataset:** Upload your `spectrograms.zip` as a [Kaggle Dataset](https://www.kaggle.com/datasets),
then add it to this notebook via **Add data** in the sidebar.

## 1. Setup & GPU Check

In [None]:
!nvidia-smi
!pip install -q ninja

import torch
assert torch.cuda.is_available(), "No GPU — enable it in Settings → Accelerator → GPU T4 x2"
print(f"PyTorch {torch.__version__}, CUDA {torch.version.cuda}, GPU: {torch.cuda.get_device_name(0)}")

## 2. Clone StyleGAN2-ADA & Apply Patches

In [None]:
import os
import sys
import pathlib
import shutil
import subprocess

# Fresh clone
if os.path.exists("stylegan2-ada-pytorch"):
    shutil.rmtree("stylegan2-ada-pytorch")
!git clone https://github.com/NVlabs/stylegan2-ada-pytorch.git
sys.path.insert(0, "stylegan2-ada-pytorch")

# ── Patch 1: Fix InfiniteSampler for PyTorch ≥2.4 ────────────────────────
misc_path = pathlib.Path("stylegan2-ada-pytorch/torch_utils/misc.py")
src = misc_path.read_text()
src = src.replace("super().__init__(dataset)", "super().__init__()")
misc_path.write_text(src)
print(f"Patched {misc_path}: InfiniteSampler fix")

# ── Patch 2: Fix Adam betas int → float for PyTorch ≥2.9 ────────────────
train_path = pathlib.Path("stylegan2-ada-pytorch/train.py")
src = train_path.read_text()
src = src.replace("betas=[0,0.99]", "betas=[0.0,0.99]")
train_path.write_text(src)
print(f"Patched {train_path}: Adam betas fix")

# ── Patch 3: Try CUDA ops compilation ────────────────────────────────────
cc_major, cc_minor = torch.cuda.get_device_capability(0)
arch = f"{cc_major}.{cc_minor}"
os.environ["TORCH_CUDA_ARCH_LIST"] = arch
os.environ["TORCH_EXTENSIONS_DIR"] = "/tmp/torch_extensions"
if os.path.exists("/tmp/torch_extensions"):
    shutil.rmtree("/tmp/torch_extensions")

result = subprocess.run(
    ["python", "-c",
     "import sys; sys.path.insert(0,'stylegan2-ada-pytorch'); "
     "from torch_utils.ops import bias_act; "
     "assert bias_act._init(), 'init failed'"],
    capture_output=True, text=True, timeout=180,
)

CUDA_OPS_OK = result.returncode == 0
if CUDA_OPS_OK:
    print(f"Custom CUDA ops compiled for sm_{cc_major}{cc_minor} — using fused kernels")
else:
    print(f"CUDA ops compilation failed (arch {arch}), using native PyTorch fallback")
    print(f"  Error: ...{result.stderr[-300:]}")
    ops_dir = pathlib.Path("stylegan2-ada-pytorch/torch_utils/ops")
    for name in ["bias_act.py", "upfirdn2d.py"]:
        p = ops_dir / name
        s = p.read_text()
        s = s.replace("def _init():", "def _init():\n    return False")
        p.write_text(s)
        print(f"  Patched {p}")

## 3. Load Dataset

Kaggle datasets are mounted at `/kaggle/input/<dataset-name>/`.

Set `KAGGLE_DATASET` to match your dataset name.

In [None]:
import os
import glob
import shutil

KAGGLE_DATASET = "spectrograms"  # <-- your Kaggle dataset name

input_dir = f"/kaggle/input/{KAGGLE_DATASET}"

# Find PNGs (may be in root or a subfolder)
pngs = glob.glob(f"{input_dir}/**/*.png", recursive=True)

# Copy to a writable working directory (Kaggle input is read-only)
DATASET_PATH = "/kaggle/working/spectrograms"
os.makedirs(DATASET_PATH, exist_ok=True)
for p in pngs:
    shutil.copy(p, DATASET_PATH)

print(f"Found {len(pngs)} PNG files → copied to {DATASET_PATH}")

## 4. Prepare Dataset

In [None]:
!python stylegan2-ada-pytorch/dataset_tool.py \
    --source={DATASET_PATH} \
    --dest=./spectrograms.zip

## 5. Configure Training

In [None]:
import os
os.environ["PYTORCH_ALLOC_CONF"] = "expandable_segments:True"

GPUS = 1              # single GPU more stable for small datasets
GAMMA = 10.0          # higher R1 regularization for small datasets
SNAP = 10
KIMG = 5000
AUG = "ada"           # adaptive discriminator augmentation
TARGET = 0.6          # ADA target heuristic — good default for small datasets
MIRROR = False
METRICS = "none"
BATCH_SIZE = 8
RESUME = ""

print(f"Config: gpus={GPUS}, batch={BATCH_SIZE}, gamma={GAMMA}, aug={AUG}, target={TARGET}, mirror={MIRROR}")

## 6. Train

In [None]:
import torch
torch.cuda.empty_cache()

resume_flag = f"--resume={RESUME}" if RESUME else ""
mirror_int = 1 if MIRROR else 0

!python stylegan2-ada-pytorch/train.py \
    --outdir=./training-runs \
    --cfg=auto \
    --data=./spectrograms.zip \
    --gpus={GPUS} \
    --batch={BATCH_SIZE} \
    --gamma={GAMMA} \
    --snap={SNAP} \
    --kimg={KIMG} \
    --aug={AUG} \
    --target={TARGET} \
    --mirror={mirror_int} \
    --metrics={METRICS} \
    {resume_flag}

## 7. Generate Samples

In [None]:
import glob
import pickle

import matplotlib.pyplot as plt
import torch

pkls = sorted(glob.glob("training-runs/**/*.pkl", recursive=True))
assert pkls, "No snapshots found — has training completed at least one snapshot?"
latest_pkl = pkls[-1]
print(f"Loading {latest_pkl}")

with open(latest_pkl, "rb") as f:
    G = pickle.load(f)["G_ema"].cuda().eval()

NUM_SAMPLES = 5
z = torch.randn(NUM_SAMPLES, G.z_dim, device="cuda")
with torch.no_grad():
    imgs = G(z, None)

fig, axes = plt.subplots(1, NUM_SAMPLES, figsize=(20, 4))
for i, ax in enumerate(axes):
    img = imgs[i, 0].cpu().numpy()
    ax.imshow(img, cmap="magma", aspect="auto")
    ax.set_title(f"Sample {i}")
    ax.axis("off")
plt.suptitle("Generated Spectrograms (StyleGAN2-ADA)")
plt.tight_layout()
plt.show()

## 8. Reconstruct Audio from Generated Spectrograms

Use Griffin-Lim phase estimation to convert the generated spectrogram images back into audio waveforms.

In [None]:
!pip install -q librosa soundfile

import numpy as np
import librosa
import soundfile as sf
import IPython.display as ipd

SR = 22050
N_FFT = 2048
HOP_LENGTH = 512
N_MELS = 512
N_ITER = 32
DB_RANGE = 80.0

output_dir = "/kaggle/working/reconstructed_audio"
os.makedirs(output_dir, exist_ok=True)

for i in range(NUM_SAMPLES):
    # Extract spectrogram as numpy array in [-1, 1]
    spec = imgs[i, 0].cpu().numpy()

    # [-1, 1] → dB → power → linear STFT → Griffin-Lim
    S_db = (spec + 1.0) * (DB_RANGE / 2.0) - DB_RANGE
    S_power = librosa.db_to_power(S_db, ref=1.0)
    S_stft = librosa.feature.inverse.mel_to_stft(S_power, sr=SR, n_fft=N_FFT, power=2.0)
    audio = librosa.griffinlim(S_stft, n_iter=N_ITER, hop_length=HOP_LENGTH, n_fft=N_FFT)

    # Normalise to -1 dBFS peak
    peak = np.abs(audio).max()
    if peak > 0:
        audio = audio / peak * 10 ** (-1.0 / 20.0)

    # Save WAV
    wav_path = f"{output_dir}/sample_{i}.wav"
    sf.write(wav_path, audio, SR)
    print(f"Sample {i}: {len(audio)} samples ({len(audio)/SR:.2f}s) → {wav_path}")

    # Inline audio player
    ipd.display(ipd.Audio(audio, rate=SR))

## 9. Save Results

Kaggle persists everything in `/kaggle/working/` as notebook output.
Click **Save Version** (top right) → the training-runs zip will be available under **Output**.

In [None]:
import shutil

shutil.make_archive("/kaggle/working/training-runs", "zip", ".", "training-runs")
shutil.make_archive("/kaggle/working/reconstructed_audio", "zip", ".", "reconstructed_audio")
print("Created /kaggle/working/training-runs.zip")
print("Created /kaggle/working/reconstructed_audio.zip")
print("These will be saved as notebook output when you click Save Version.")