In [2]:
pip install torch-directml


Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement torch-directml (from versions: none)

[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip
ERROR: No matching distribution found for torch-directml


In [3]:
import torch

# Try CUDA
if torch.cuda.is_available():
    device = torch.device("cuda")
    backend = "CUDA"
# Try DirectML on AMD/Windows
else:
    try:
        import torch_directml
        device = torch_directml.device()
        backend = "DirectML"
    except ImportError:
        device = torch.device("cpu")
        backend = "CPU"

print(f"Using backend: {backend}, device: {device}")


Using backend: CPU, device: cpu


In [None]:
import os
import time
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
from numpy.linalg import norm
import torch
from memory_profiler import memory_usage

# --- Check if GPU is available ---
if torch.cuda.is_available():
    device = torch.device("cuda")
    gpu_name = torch.cuda.get_device_name(0)
    print(f"GPU available: {gpu_name}")
else:
    device = torch.device("cpu")
    print("No GPU available, falling back to CPU")

# --- File paths ---
base_path = r'G:\My Drive\NUS\NUS Y6S1\ME5311\PROJECT_2420_ME5311'
slp_path = os.path.join(base_path, 'slp.nc')
t2m_path = os.path.join(base_path, 't2m.nc')

# --- Load datasets ---
ds_slp = xr.open_dataset(slp_path)
ds_t2m = xr.open_dataset(t2m_path)

slp = ds_slp['msl'].values
t2m = ds_t2m['t2m'].values
timestamps = ds_slp['time'].values
lats = ds_slp['latitude'].values
longs = ds_slp['longitude'].values

# --- Reshape and center SLP ---
n_time, n_lat, n_lon = slp.shape
A_slp = slp.reshape(n_time, -1).T  # shape: (n_space, n_time)
A_mean_slp = A_slp.mean(axis=1, keepdims=True)
A_centered_slp = A_slp - A_mean_slp

In [None]:
# --- Define parameters ---
k = None  # Full SVD (will be truncated after computation if needed)

# --- Define GPU-accelerated SVD function with monitoring ---
def perform_gpu_svd_with_monitoring(A):
    """Performs GPU-accelerated SVD with runtime and memory tracking"""
    print(f"Performing GPU-accelerated SVD on SLP data using {device}...")
    
    def gpu_svd_task():
        global U_slp, S_slp, VT_slp
        
        # Convert numpy array to PyTorch tensor and move to GPU
        A_tensor = torch.tensor(A, dtype=torch.float32).to(device)
        
        # Perform SVD on GPU
        U_tensor, S_tensor, V_tensor = torch.linalg.svd(A_tensor, full_matrices=False)
        
        # Move results back to CPU and convert to numpy
        U_slp = U_tensor.cpu().numpy()
        S_slp = S_tensor.cpu().numpy()
        VT_slp = V_tensor.transpose(0, 1).cpu().numpy()
        
        # Clear GPU cache to free memory
        torch.cuda.empty_cache() if torch.cuda.is_available() else None

    # Monitor runtime and memory usage
    start = time.time()
    mem_usage = memory_usage(gpu_svd_task, max_usage=True)
    elapsed = time.time() - start
    
    return elapsed, mem_usage

# --- Run GPU-accelerated SVD with performance monitoring ---
elapsed_slp, peak_mem_slp = perform_gpu_svd_with_monitoring(A_centered_slp)

# --- SVD result shapes (informational) ---
print(f"A shape: {A_slp.shape}")
print(f"U shape: {U_slp.shape}, S shape: {S_slp.shape}, VT shape: {VT_slp.shape}")
print(f"GPU-accelerated SVD completed")

In [None]:
# --- Truncate to top k components if desired ---
k_truncate = 500  # Number of components to keep for analysis
U_trunc = U_slp[:, :k_truncate]
S_trunc = S_slp[:k_truncate]
VT_trunc = VT_slp[:k_truncate, :]

# --- Accuracy (Reconstruction Error) ---
def calculate_reconstruction_error(U, S, VT, A_original, A_mean):
    """Calculate reconstruction error using Frobenius norm"""
    # Create diagonal S matrix for matrix multiplication
    S_diag = np.diag(S)
    
    # Reconstruct the original matrix
    A_reconstructed = U @ S_diag @ VT + A_mean
    
    # Calculate relative error
    error = norm(A_original - A_reconstructed) / norm(A_original)
    return error, A_reconstructed

# --- Noise Robustness Test ---
def test_noise_robustness(A_centered, A_original, A_mean, noise_scale=0.01):
    """Test GPU SVD robustness against Gaussian noise"""
    np.random.seed(0)  # For reproducibility
    noise = np.random.normal(scale=noise_scale, size=A_centered.shape)
    A_noisy = A_centered + noise
    
    # Convert to PyTorch tensor and move to GPU
    A_noisy_tensor = torch.tensor(A_noisy, dtype=torch.float32).to(device)
    
    # Perform SVD on noisy data
    U_noisy_tensor, S_noisy_tensor, V_noisy_tensor = torch.linalg.svd(A_noisy_tensor, full_matrices=False)
    
    # Move results back to CPU and convert to numpy
    U_noisy = U_noisy_tensor.cpu().numpy()
    S_noisy = S_noisy_tensor.cpu().numpy()
    VT_noisy = V_noisy_tensor.transpose(0, 1).cpu().numpy()
    
    # Truncate if needed
    U_noisy = U_noisy[:, :k_truncate]
    S_noisy = S_noisy[:k_truncate]
    VT_noisy = VT_noisy[:k_truncate, :]
    
    # Calculate reconstruction error with noise
    S_noisy_diag = np.diag(S_noisy)
    A_reconstructed_noisy = U_noisy @ S_noisy_diag @ VT_noisy + A_mean
    
    error = norm(A_original - A_reconstructed_noisy) / norm(A_original)
    
    # Clear GPU cache
    torch.cuda.empty_cache() if torch.cuda.is_available() else None
    
    return error

# Calculate reconstruction error (using truncated components)
reconstruction_error, A_reconstructed = calculate_reconstruction_error(
    U_trunc, S_trunc, VT_trunc, A_slp, A_mean_slp
)

# Test noise robustness
noise_error = test_noise_robustness(
    A_centered_slp, A_slp, A_mean_slp
)

# --- Report results ---
print("\n===== GPU-accelerated SVD Results for SLP =====")
print(f"Device used: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
print(f"Runtime: {elapsed_slp:.2f} seconds")
print(f"Peak memory usage: {peak_mem_slp:.2f} MiB")
print(f"Reconstruction error with {k_truncate} modes (Frobenius norm): {reconstruction_error:.6e}")
print(f"Noise robustness (error with Gaussian noise): {noise_error:.6e}")

# --- Cumulative energy ---
total_energy = np.sum(S_slp**2)
cumulative_energy = np.cumsum(S_slp**2) / total_energy
energy_90 = np.where(cumulative_energy >= 0.9)[0][0] + 1
energy_95 = np.where(cumulative_energy >= 0.95)[0][0] + 1

print(f"Number of modes for 90% energy: {energy_90}")
print(f"Number of modes for 95% energy: {energy_95}")

# --- Optional: Save results to file for later comparison ---
results = {
    "method": "GPU-accelerated SVD",
    "device": str(device),
    "gpu_name": torch.cuda.get_device_name(0) if torch.cuda.is_available() else "None",
    "runtime": elapsed_slp,
    "memory_usage": peak_mem_slp,
    "reconstruction_error": float(reconstruction_error),
    "noise_robustness": float(noise_error),
    "modes_90pct_energy": int(energy_90),
    "modes_95pct_energy": int(energy_95),
    "top_singular_values": S_slp[:10].tolist()  # Save first 10 singular values
}

# Save as JSON (optional)
import json
with open("gpu_svd_results.json", "w") as f:
    json.dump(results, f, indent=4)

# --- Plot singular value decay ---
plt.figure(figsize=(10, 6))
plt.semilogy(range(1, 101), S_slp[:100], 'o-')
plt.title('Singular Value Decay (Top 100) - GPU-accelerated SVD')
plt.xlabel('Index')
plt.ylabel('Singular Value (log scale)')
plt.grid(True)
plt.savefig('gpu_svd_singular_values.png', dpi=300)
plt.show()

# --- Plot cumulative energy ---
plt.figure(figsize=(10, 6))
plt.plot(range(1, min(1001, len(cumulative_energy))), cumulative_energy[:1000])
plt.axhline(y=0.9, color='r', linestyle='--', label='90% Energy')
plt.axhline(y=0.95, color='g', linestyle='--', label='95% Energy')
plt.title('Cumulative Energy vs. Number of Modes - GPU-accelerated SVD')
plt.xlabel('Number of Modes')
plt.ylabel('Cumulative Energy Fraction')
plt.legend()
plt.grid(True)
plt.savefig('gpu_svd_cumulative_energy.png', dpi=300)
plt.show()