In [1]:
# --- Monkey‑patch mpi4py so no real MPI runtime is required --------------
import sys, types

# Create a fake mpi4py module
fake_mpi = types.ModuleType("mpi4py")
# Provide an MPI attribute so `from mpi4py import MPI` works
fake_mpi.MPI = None

# Register it in sys.modules
sys.modules["mpi4py"] = fake_mpi

# Also register the submodule path (in case code does import mpi4py.MPI)
sys.modules["mpi4py.MPI"] = None


In [3]:
import sys, subprocess

# Install missing I/O backends
subprocess.check_call([sys.executable, "-m", "pip", "install", "h5py", "netcdf4"])


0

In [4]:
import os, sys
# Adjust the path to point to your local clone
sys.path.insert(0, r"G:\My Drive\NUS\NUS Y6S1\ME5311\PROJECT_2420_ME5311\PyParSVD")

# Now both serial and parallel classes will load without errors
from pyparsvd.parsvd_serial   import ParSVD_Serial
from pyparsvd.parsvd_parallel import ParSVD_Parallel

print("Serial class:", ParSVD_Serial)
print("Parallel class:", ParSVD_Parallel)


Serial class: <class 'pyparsvd.parsvd_serial.ParSVD_Serial'>
Parallel class: <class 'pyparsvd.parsvd_parallel.ParSVD_Parallel'>


In [5]:
# --- Imports & optional installation -----------------------------------------
import os
import time
import json
import platform
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
from numpy.linalg import norm
from memory_profiler import memory_usage

# --- File paths --------------------------------------------------------------
# Adjust `base_path` to your local or VANDA storage location.
base_path = r'G:\My Drive\NUS\NUS Y6S1\ME5311\PROJECT_2420_ME5311'              # <‑‑ change as required
slp_path  = os.path.join(base_path, 'slp.nc')       # sea‑level pressure

ds_slp    = xr.open_dataset(slp_path)
slp       = ds_slp['msl'].values                    # float32, shape (time, lat, lon)

# --- Reshape and centre ------------------------------------------------------
# Convert to (n_space, n_time) where n_space = lat*lon
n_time, n_lat, n_lon = slp.shape
A_slp     = slp.reshape(n_time, -1).T               # (n_space, n_time)
A_mean    = A_slp.mean(axis=1, keepdims=True)
A_center  = A_slp - A_mean       

In [None]:
# Simple PyParSVD Implementation for Your Dataset

# 1. Install PyParSVD (uncomment if not already installed)
# !pip install pyparsvd==0.0.4

import os
import numpy as np
import xarray as xr
import pyparsvd
import matplotlib.pyplot as plt

# --- Configuration ---
base_path = r'G:\My Drive\NUS\NUS Y6S1\ME5311\PROJECT_2420_ME5311' # <-- Update this to your data folder
slp_file = 'slp.nc'               # Ensure this file is in base_path
k = 50                            # Number of SVD modes to compute

# --- Load & Preprocess Data ---
ds = xr.open_dataset(os.path.join(base_path, slp_file))
A = ds['msl'].values              # Shape: (time, lat, lon)
n_time, n_lat, n_lon = A.shape
A_mat = A.reshape(n_time, -1).T   # Reshape to (space, time)

# Mean-subtract (recommended)
mean_vec = A_mat.mean(axis=1, keepdims=True)
A_centered = A_mat - mean_vec

# --- Compute SVD ---
svd_handler = pyparsvd.SVD(k=k, mean_subtract=False)  # mean_subtract already done
U, S, VT = svd_handler.compute(A_centered)

# --- Results ---
print(f"Computed top {k} modes:")
print("U shape:", U.shape)
print("S shape:", S.shape)
print("VT shape:", VT.shape)

# --- Plot Singular Value Spectrum ---
plt.figure()
plt.plot(np.arange(1, k+1), S, marker='o')
plt.title('Singular Values')
plt.xlabel('Mode Number')
plt.ylabel('Singular Value')
plt.grid(True)
plt.show()


AttributeError: module 'pyparsvd' has no attribute 'SVD'

In [None]:
# --- Accuracy (Reconstruction Error) ---
def calculate_reconstruction_error(U, S, VT, A_original, A_mean):
    """Calculate reconstruction error using Frobenius norm"""
    # Create diagonal S matrix for matrix multiplication
    S_diag = np.diag(S)
    
    # Reconstruct the original matrix
    A_reconstructed = U @ S_diag @ VT + A_mean
    
    # Calculate relative error
    error = norm(A_original - A_reconstructed) / norm(A_original)
    return error, A_reconstructed

# --- Noise Robustness Test ---
def test_noise_robustness(A_centered, A_original, A_mean, k, mpi_mode=False, noise_scale=0.01):
    """Test Parallel SVD robustness against Gaussian noise"""
    np.random.seed(0)  # For reproducibility
    noise = np.random.normal(scale=noise_scale, size=A_centered.shape)
    A_noisy = A_centered + noise
    
    # Create new PyParSVD handler for noisy data
    svd_handler_noisy = pyparsvd.SVD(
        k=k,
        mean_subtract=False,
        mpi_enabled=mpi_mode
    )
    
    # Fit on noisy data
    svd_handler_noisy.fit(A_noisy)
    
    # Extract components
    U_noisy = svd_handler_noisy.u
    S_noisy = svd_handler_noisy.s
    VT_noisy = svd_handler_noisy.v.T
    
    # Calculate reconstruction error with noise
    S_noisy_diag = np.diag(S_noisy)
    A_reconstructed_noisy = U_noisy @ S_noisy_diag @ VT_noisy + A_mean
    
    error = norm(A_original - A_reconstructed_noisy) / norm(A_original)
    return error

# Calculate reconstruction error
reconstruction_error, A_reconstructed = calculate_reconstruction_error(
    U_slp, S_slp, VT_slp, A_slp, A_mean_slp
)

# Test noise robustness
noise_error = test_noise_robustness(
    A_centered_slp, A_slp, A_mean_slp, k, mpi_mode
)

# --- Report results ---
print("\n===== Parallel SVD Results for SLP =====")
print(f"Number of components: k = {k}")
print(f"MPI mode: {mpi_mode}")
print(f"Runtime: {elapsed_slp:.2f} seconds")
print(f"Peak memory usage: {peak_mem_slp:.2f} MiB")
print(f"Reconstruction error (Frobenius norm): {reconstruction_error:.6e}")
print(f"Noise robustness (error with Gaussian noise): {noise_error:.6e}")

# --- Cumulative energy ---
total_energy = np.sum(S_slp**2)
cumulative_energy = np.cumsum(S_slp**2) / total_energy
print(f"Energy captured by {k} components: {cumulative_energy[-1]:.6f} ({cumulative_energy[-1]*100:.2f}%)")

# --- Optional: Save results to file for later comparison ---
results = {
    "method": "Parallel SVD (PyParSVD)",
    "k_value": k,
    "mpi_mode": mpi_mode,
    "runtime": elapsed_slp,
    "memory_usage": peak_mem_slp,
    "reconstruction_error": float(reconstruction_error),
    "noise_robustness": float(noise_error),
    "energy_captured": float(cumulative_energy[-1]),
    "top_singular_values": S_slp[:10].tolist()  # Save first 10 singular values
}

# Save as JSON (optional)
import json
with open("parallel_svd_results.json", "w") as f:
    json.dump(results, f, indent=4)

# --- Plot singular value decay ---
plt.figure(figsize=(10, 6))
plt.semilogy(range(1, len(S_slp) + 1), S_slp, 'o-')
plt.title(f'Singular Value Decay (k={k}) - Parallel SVD')
plt.xlabel('Index')
plt.ylabel('Singular Value (log scale)')
plt.grid(True)
plt.savefig('parallel_svd_singular_values.png', dpi=300)
plt.show()

# --- Plot cumulative energy ---
plt.figure(figsize=(10, 6))
plt.plot(range(1, len(S_slp) + 1), cumulative_energy, 'o-')
plt.title('Cumulative Energy vs. Number of Modes - Parallel SVD')
plt.xlabel('Number of Modes')
plt.ylabel('Cumulative Energy Fraction')
plt.grid(True)
plt.savefig('parallel_svd_cumulative_energy.png', dpi=300)
plt.show()

# --- Optional: Visualize modes ---
if k >= 3:
    # Visualize spatial modes
    plt.figure(figsize=(15, 5))
    for i in range(3):  # First 3 modes
        plt.subplot(1, 3, i+1)
        mode = U_slp[:, i].reshape(n_lat, n_lon)
        plt.imshow(mode, cmap='RdBu_r')
        plt.colorbar()
        plt.title(f'Spatial Mode {i+1}')
    plt.tight_layout()
    plt.savefig('parallel_svd_spatial_modes.png', dpi=300)
    plt.show()
    
    # Visualize temporal modes
    plt.figure(figsize=(15, 5))
    for i in range(3):  # First 3 modes
        plt.subplot(1, 3, i+1)
        plt.plot(timestamps, VT_slp[i])
        plt.title(f'Temporal Mode {i+1}')
        plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig('parallel_svd_temporal_modes.png', dpi=300)
    plt.show()