In [None]:
# Multiflux 256^3 - CPU-optimized notebook
# License: CC BY-NC-SA 4.0
# Paste this block into a cell (or split into logical cells at comments).

# --- Imports and basic setup ---
import os, time, gc
import numpy as np
from scipy.fft import fftn, ifftn, fftfreq
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import json

print('Environment ready')

# --- Parameters (edit if needed) ---
N = 256            # grid (full generation)
L = 2 * np.pi
SEED = 42
K_CLUSTERS = 12
DS = 4             # downsample factor for invariants (256/4=64 -> 262k samples)
OUTDIR = 'output_multiflux_256_cpu'
SAVE_PLOTS = True
VERBOSE = True

os.makedirs(OUTDIR, exist_ok=True)
np.random.seed(SEED)

def log(msg):
    if VERBOSE:
        print(f"[{time.strftime('%H:%M:%S')}] {msg}")

# --- 1) Build k-grid ---
log(f"Building k-grid for N={N}")
kx = 2 * np.pi * fftfreq(N, d=L/N)
ky = 2 * np.pi * fftfreq(N, d=L/N)
kz = 2 * np.pi * fftfreq(N, d=L/N)
KX, KY, KZ = np.meshgrid(kx, ky, kz, indexing='ij')
K2 = KX**2 + KY**2 + KZ**2
K2 += 1e-30
K = np.sqrt(K2)
mask = K2 > 0

# --- 2) Generate random spectral fields (memory-aware) ---
log("Generating random fields and spectral transforms (sequential to limit peak memory)")

shape = (N, N, N)

# generate and FFT each field sequentially
u = np.random.randn(*shape)
u_hat = fftn(u)
del u; gc.collect()

v = np.random.randn(*shape)
v_hat = fftn(v)
del v; gc.collect()

w = np.random.randn(*shape)
w_hat = fftn(w)
del w; gc.collect()

# apply spectral scaling (approximate kolmogorov): multiply by K**(-5/6)
log("Applying spectral scaling")
scale = np.where(mask, K**(-5/6), 0.0)
u_hat *= scale
v_hat *= scale
w_hat *= scale

# solenoidal projection in spectral space
log("Applying solenoidal projection")
k_dot = KX * u_hat + KY * v_hat + KZ * w_hat
u_hat = u_hat - KX * (k_dot / K2)
v_hat = v_hat - KY * (k_dot / K2)
w_hat = w_hat - KZ * (k_dot / K2)
del k_dot; gc.collect()

# inverse transform to physical space (free spectral arrays early)
log("IFFT to physical space")
u = np.real(ifftn(u_hat)); del u_hat; gc.collect()
v = np.real(ifftn(v_hat)); del v_hat; gc.collect()
w = np.real(ifftn(w_hat)); del w_hat; gc.collect()

KE_mean = 0.5 * np.mean(u*u + v*v + w*w)
log(f"Mean kinetic energy density: {KE_mean:.6e}")

# save raw velocities (compressed)
np.savez_compressed(os.path.join(OUTDIR, 'velocity_256_cpu.npz'), u=u, v=v, w=w)
log("Saved velocity_256_cpu.npz")

# --- 3) Gradients via FFT (spectral differentiation) ---
log("Computing gradients (spectral differentiation)")
def grad_fft_field(f):
    fhat = fftn(f)
    fx = np.real(ifftn(1j * KX * fhat))
    fy = np.real(ifftn(1j * KY * fhat))
    fz = np.real(ifftn(1j * KZ * fhat))
    del fhat; gc.collect()
    return fx, fy, fz

du_dx, du_dy, du_dz = grad_fft_field(u)
dv_dx, dv_dy, dv_dz = grad_fft_field(v)
dw_dx, dw_dy, dw_dz = grad_fft_field(w)
log("Gradients computed")

# --- 4) Downsample and compute invariants (memory safe) ---
log(f"Downsampling by factor DS={DS} for invariant computation")
inds = slice(0, N, DS)

u_s = u[inds, inds, inds]
v_s = v[inds, inds, inds]
w_s = w[inds, inds, inds]

du_dx_s = du_dx[inds, inds, inds]
du_dy_s = du_dy[inds, inds, inds]
du_dz_s = du_dz[inds, inds, inds]
dv_dx_s = dv_dx[inds, inds, inds]
dv_dy_s = dv_dy[inds, inds, inds]
dv_dz_s = dv_dz[inds, inds, inds]
dw_dx_s = dw_dx[inds, inds, inds]
dw_dy_s = dw_dy[inds, inds, inds]
dw_dz_s = dw_dz[inds, inds, inds]

# vorticity components
omega_x = dw_dy_s - dv_dz_s
omega_y = du_dz_s - dw_dx_s
omega_z = dv_dx_s - du_dy_s
vort_mag = np.sqrt(omega_x**2 + omega_y**2 + omega_z**2)

# helicity
helicity = u_s * omega_x + v_s * omega_y + w_s * omega_z

# gradient tensor A, S, Omega
A = np.array([
    [du_dx_s, du_dy_s, du_dz_s],
    [dv_dx_s, dv_dy_s, dv_dz_s],
    [dw_dx_s, dw_dy_s, dw_dz_s]
])
S = 0.5 * (A + np.transpose(A, (1,0,2,3,4)))
Omega = 0.5 * (A - np.transpose(A, (1,0,2,3,4)))

tr_Omega2 = np.sum(Omega**2, axis=(0,1))
tr_S2 = np.sum(S**2, axis=(0,1))
Q = 0.5 * (tr_Omega2 - tr_S2)

# lambda2 via eigenvalues of M = S^2 + Omega^2
log("Computing lambda2 on downsampled grid (this is the most expensive step on CPU)")
S2 = np.einsum('il...,lj...->ij...', S, S)
Omega2 = np.einsum('il...,lj...->ij...', Omega, Omega)
M = S2 + Omega2
M = np.moveaxis(M, [0,1], [-2,-1])
eigvals = np.linalg.eigvalsh(M)
lambda2 = np.sort(eigvals, axis=-1)[..., -2]
del M, eigvals; gc.collect()

log("Invariants ready")

# --- 5) Feature matrix and clustering (KMeans++) ---
features = np.column_stack([vort_mag.ravel(), Q.ravel(), lambda2.ravel(), helicity.ravel()])
means = features.mean(axis=0)
stds = features.std(axis=0) + 1e-12
X = (features - means) / stds
n_points = X.shape[0]
log(f"Feature matrix shape: {X.shape}")

log(f"Running KMeans with k={K_CLUSTERS}")
t0 = time.time()
kmeans = KMeans(n_clusters=K_CLUSTERS, n_init=20, max_iter=500, random_state=SEED)
labels_flat = kmeans.fit_predict(X)
t1 = time.time()
log(f"KMeans finished in {t1-t0:.1f}s")

labels_ds = labels_flat.reshape((N//DS, N//DS, N//DS))

# volumes and N_eff
counts = np.bincount(labels_flat, minlength=K_CLUSTERS)
volumes = counts / float(labels_flat.size)
N_eff = int(np.sum(counts > 0.005 * labels_flat.size))
log(f"N_eff (cutoff 0.5%): {N_eff}")

# --- 6) Save outputs (CSV, NPZ, report) ---
csv_path = os.path.join(OUTDIR, 'cluster_volumes_256_cpu.csv')
with open(csv_path, 'w') as f:
    f.write('cluster_id,count,volume_fraction\n')
    for cid, cnt, vol in zip(range(K_CLUSTERS), counts, volumes):
        f.write(f'{cid},{int(cnt)},{vol:.8f}\n')

np.savez_compressed(os.path.join(OUTDIR, 'multiflux_256_cpu.npz'),
                    u=u, v=v, w=w,
                    vort_mag=vort_mag, Q=Q, lambda2=lambda2, helicity=helicity,
                    labels_ds=labels_ds,
                    meta=dict(N=N, L=L, seed=SEED, k_clusters=K_CLUSTERS, DS=DS))
report_path = os.path.join(OUTDIR, 'report.txt')
with open(report_path, 'w') as f:
    f.write('Multiflux HIT 256 run report\n')
    f.write(f'Grid: {N}^3; L={L}; seed={SEED}; k_clusters={K_CLUSTERS}\n')
    f.write(f'Mean kinetic energy density: {KE_mean:.6e}\n')
    f.write(f'N_eff (cutoff 0.50%): {N_eff}\n')
    f.write('Top 5 cluster volumes (%): ' + ', '.join(f'{100*v:.2f}' for v in sorted(volumes, reverse=True)[:5]) + '\n')

log(f"Saved CSV: {csv_path}, NPZ and report")

# --- 7) Visualization (approximate upsample of labels for display) ---
n = N // DS
labels_full_approx = np.repeat(np.repeat(np.repeat(labels_ds, DS, axis=0), DS, axis=1), DS, axis=2)
labels_full_approx = labels_full_approx[:N, :N, :N]
mid = N // 2

fig, axes = plt.subplots(2,2, figsize=(14,12))
axes[0,0].imshow(labels_full_approx[:, :, mid], origin='lower', interpolation='nearest')
axes[0,0].set_title('Slice XY (approx)')
axes[0,1].imshow(labels_full_approx[:, mid, :].T, origin='lower', interpolation='nearest')
axes[0,1].set_title('Slice XZ (approx)')
axes[1,0].imshow(labels_full_approx[mid, :, :].T, origin='lower', interpolation='nearest')
axes[1,0].set_title('Slice YZ (approx)')
axes[1,1].bar(np.arange(K_CLUSTERS), volumes*100)
axes[1,1].set_title('Cluster volumes (%)')

plt.suptitle(f'Multiflux 256 CPU (DS={DS}) â€” N_eff={N_eff}')
plt.tight_layout(rect=[0,0.03,1,0.95])
fig_path = os.path.join(OUTDIR, 'multiflux_slices_and_hist_256_cpu.png')
plt.savefig(fig_path, dpi=200, bbox_inches='tight')
if SAVE_PLOTS:
    plt.show()
plt.close()
log(f"Saved figure to {fig_path}")

print('\\n=== SUMMARY ===')
print(f'Grid: {N}^3, Downsample DS={DS} => feature grid {(N//DS)}^3')
print(f'Mean KE density: {KE_mean:.6e}')
print(f'N_eff (cutoff 0.5%): {N_eff}')
print('Top 5 cluster volumes (%):', ', '.join(f'{100*v:.2f}' for v in sorted(volumes, reverse=True)[:5]))
print('Outputs saved to:', OUTDIR)


Environment ready
[23:39:59] Building k-grid for N=256
[23:40:03] Generating random fields and spectral transforms (sequential to limit peak memory)
[23:40:37] Applying spectral scaling
[23:40:50] Applying solenoidal projection
[23:42:57] IFFT to physical space
[23:43:03] Mean kinetic energy density: 7.298977e+17
[23:43:34] Saved velocity_256_cpu.npz
[23:43:34] Computing gradients (spectral differentiation)
[00:41:56] Gradients computed
[00:42:00] Downsampling by factor DS=4 for invariant computation
[00:42:02] Computing lambda2 on downsampled grid (this is the most expensive step on CPU)
[00:42:07] Invariants ready
[00:42:07] Feature matrix shape: (262144, 4)
[00:42:07] Running KMeans with k=12
[00:42:39] KMeans finished in 31.8s
[00:42:39] N_eff (cutoff 0.5%): 12
[00:44:50] Saved CSV: output_multiflux_256_cpu/cluster_volumes_256_cpu.csv, NPZ and report
