# Forward Diffusion Sanity Check

Visualize how noise progressively destroys protein structure.

**Expected behavior:**
- t=0: Clear protein structure
- t=T/2: Noisy but recognizable
- t=T: Random spherical cloud

In [None]:
from pathlib import Path

import numpy as np
import torch
import py3Dmol

from src.data_cath import get_one_chain
from src.diffusion import DiffusionSchedule
from src.geom import center, ca_bond_lengths, radius_of_gyration, compute_ca_dihedrals
from src.pdb_io import ca_to_pdb_str

In [None]:
# Load a protein
name, seq, ca_coords = get_one_chain(Path("../data/chain_set.jsonl"))
print(f"Loaded {name}, {len(seq)} residues")

# Center and convert to tensor
ca_centered, _ = center(ca_coords)
x0 = torch.from_numpy(ca_centered).float()
print(f"Centered at origin, shape: {x0.shape}")

In [None]:
# Create diffusion schedule
T = 1000
schedule = DiffusionSchedule(T=T, kind="linear")

print(f"Diffusion Schedule (T={T}):")
print(f"  beta range: [{schedule.betas[0]:.6f}, {schedule.betas[-1]:.6f}]")
print(f"  alpha_bar at t=0:   {schedule.alpha_bars[0]:.4f} (100% signal)")
print(f"  alpha_bar at t=500: {schedule.alpha_bars[500]:.4f} (~8% signal)")
print(f"  alpha_bar at t=999: {schedule.alpha_bars[999]:.6f} (~0% signal)")

In [None]:
# Generate noisy versions at different timesteps
torch.manual_seed(42)

timesteps = [0, 250, 500, 750, 999]
noisy_coords = {}

for t in timesteps:
    t_tensor = torch.tensor([t])
    x_t, _ = schedule.q_sample(x0.unsqueeze(0), t_tensor)
    noisy_coords[t] = x_t.squeeze(0).numpy()

print("Generated noisy structures at timesteps:", timesteps)

In [None]:
# Analyze geometry at each timestep
print(f"{'t':>6} {'alpha_bar':>10} {'Rg':>8} {'CA-CA':>8} {'Dihedral std':>12}")
print("-" * 50)

for t in timesteps:
    coords = noisy_coords[t]
    alpha_bar = schedule.alpha_bars[t].item()
    rg = radius_of_gyration(coords)
    bonds = ca_bond_lengths(coords)
    dihedrals = compute_ca_dihedrals(coords)
    
    print(f"{t:>6} {alpha_bar:>10.4f} {rg:>8.2f} {bonds.mean():>8.2f} {np.degrees(dihedrals).std():>12.1f}")

## Visualize Noising Process

Watch the protein structure get progressively destroyed by noise.

In [None]:
def show_structure(coords, title="", width=400, height=300):
    """Render coordinates with py3Dmol."""
    pdb_str = ca_to_pdb_str(coords)
    view = py3Dmol.view(width=width, height=height)
    view.addModel(pdb_str, "pdb")
    view.setStyle({"cartoon": {"color": "spectrum"}})
    view.zoomTo()
    print(title)
    return view.show()

In [None]:
# t=0: Original structure
show_structure(noisy_coords[0], f"t=0 (alpha_bar={schedule.alpha_bars[0]:.4f}) - Original")

In [None]:
# t=250: Slightly noisy
show_structure(noisy_coords[250], f"t=250 (alpha_bar={schedule.alpha_bars[250]:.4f}) - Slightly noisy")

In [None]:
# t=500: Half noisy
show_structure(noisy_coords[500], f"t=500 (alpha_bar={schedule.alpha_bars[500]:.4f}) - Half signal")

In [None]:
# t=750: Mostly noise
show_structure(noisy_coords[750], f"t=750 (alpha_bar={schedule.alpha_bars[750]:.4f}) - Mostly noise")

In [None]:
# t=999: Pure noise
show_structure(noisy_coords[999], f"t=999 (alpha_bar={schedule.alpha_bars[999]:.6f}) - Pure noise")

## Summary

If everything looks right:
- t=0 shows clear protein backbone
- Structure progressively becomes more chaotic
- t=999 is a random cloud with no protein-like features

**Next step:** Train a model to reverse this process (predict the noise and subtract it).