diff --git a/.github/workflows/gpu_ci_trigger.yml b/.github/workflows/gpu_ci_trigger.yml index 9ed33d7..4418074 100644 --- a/.github/workflows/gpu_ci_trigger.yml +++ b/.github/workflows/gpu_ci_trigger.yml @@ -1,3 +1,24 @@ +# SETUP INSTRUCTIONS: +# ------------------ +# This workflow synchronizes the code to GitLab via SSH to trigger GPU-enabled CI. +# +# 1. GENERATE SSH KEY PAIR (on your local machine): +# ssh-keygen -t ed25519 -f ~/.ssh/gitlab_sync_key -N "" -C "github-to-gitlab-sync" +# +# 2. CONFIGURE GITLAB (The Target): +# - Go to GitLab project > Settings > Repository > Deploy keys. +# - Add the content of '~/.ssh/gitlab_sync_key.pub'. +# - IMPORTANT: Check "Allow write access to this repository". +# +# 3. CONFIGURE GITHUB (The Source): +# - Go to GitHub repo > Settings > Secrets and variables > Actions. +# - Add new Repository Secrets: +# - Name: GITLAB_SSH_PRIVATE_KEY +# Value: Paste the entire content of '~/.ssh/gitlab_sync_key'. +# - Name: GITLAB_TOKEN +# Value: Your GitLab Personal Access Token (with 'api' and 'read_repository' scopes). +# + name: Sync to GitLab and Run GPU CI on: @@ -22,6 +43,7 @@ jobs: ssh-private-key: ${{ secrets.GITLAB_SSH_PRIVATE_KEY }} - name: Push to GitLab via SSH & Provide Link + id: push run: | # 1. Setup SSH known hosts mkdir -p ~/.ssh @@ -29,10 +51,13 @@ jobs: # 2. Determine target branch if [ "${{ github.event_name }}" == "pull_request" ]; then - TARGET_BRANCH="pr-${{ github.event.number }}" + TARGET_BRANCH="gpu-test-pr-${{ github.event.number }}" else - TARGET_BRANCH="${{ github.ref_name }}" + SOURCE_REF="${{ github.ref_name }}" + SAFE_REF="${SOURCE_REF//\//-}" + TARGET_BRANCH="gpu-test-${SAFE_REF}" fi + echo "TARGET_BRANCH=$TARGET_BRANCH" >> $GITHUB_ENV # 3. Add GitLab SSH remote git remote add gitlab git@gitlab.mpcdf.mpg.de:maxlin/cunumpy.git @@ -41,9 +66,16 @@ jobs: git push -f gitlab HEAD:refs/heads/$TARGET_BRANCH # 5. Provide the direct link - # We construct the URL manually since the push triggers the pipeline automatically PIPELINE_URL="https://gitlab.mpcdf.mpg.de/maxlin/cunumpy/-/pipelines?ref=$TARGET_BRANCH" echo "::notice::GitLab GPU CI Pipeline started automatically via Push!" echo "::notice::View Pipeline: $PIPELINE_URL" + - name: Wait for GitLab Pipeline + uses: docker://gitlab/glab:latest + env: + GITLAB_TOKEN: ${{ secrets.GITLAB_TOKEN }} + GITLAB_HOST: gitlab.mpcdf.mpg.de + with: + entrypoint: glab + args: ci status --live --branch ${{ env.TARGET_BRANCH }} --repo maxlin/cunumpy diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b76c09b..79d4881 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -15,6 +15,7 @@ gpu_tests: before_script: - module load python-waterboa/2025.06 - module load nvhpcsdk/26 + - module load fftw-serial/3.3.10 script: - echo "--- CUDA Sanity Check ---" - nvidia-smi @@ -31,10 +32,15 @@ gpu_tests: # The MPCDF image likely has a specific python environment. # We install our dependencies into the user directory or a virtualenv. - python3 -m pip install --user cupy-cuda12x + - python3 -m pip install --user nvidia-cublas-cu12 nvidia-cufft-cu12 nvidia-curand-cu12 nvidia-cusolver-cu12 nvidia-cusparse-cu12 - python3 -m pip install --user -e . # Add the user bin to PATH for pytest - export PATH="$HOME/.local/bin:$PATH" + + # Try to find libcublas and other libraries in the HPC environment + - export LD_LIBRARY_PATH=$(find /mpcdf/soft /opt/nvidia -name libcublas.so.12 -exec dirname {} \; 2>/dev/null | head -n 1):$LD_LIBRARY_PATH + - export ARRAY_BACKEND=cupy - - python3 -m pytest tests/unit/ + - pytest -xvs . diff --git a/src/cunumpy/__init__.py b/src/cunumpy/__init__.py index c6592bd..383f32a 100644 --- a/src/cunumpy/__init__.py +++ b/src/cunumpy/__init__.py @@ -1,6 +1,7 @@ # cunumpy/__init__.py from . import xp from .xp import ( + cupy_available, get_backend, is_cpu, is_gpu, @@ -14,6 +15,7 @@ __all__ = [ "xp", + "cupy_available", "to_numpy", "to_cupy", "to_cunumpy", diff --git a/src/cunumpy/__init__.pyi b/src/cunumpy/__init__.pyi index 2a4fb33..8f51ee6 100644 --- a/src/cunumpy/__init__.pyi +++ b/src/cunumpy/__init__.pyi @@ -12,6 +12,7 @@ from . import xp def to_numpy(array: Any) -> np.ndarray: ... def to_cupy(array: Any) -> Any: ... def to_cunumpy(array: Any) -> Any: ... +def cupy_available() -> bool: ... def get_backend(array: Any) -> str: ... def is_gpu(array: Any) -> bool: ... def is_cpu(array: Any) -> bool: ... diff --git a/src/cunumpy/xp.py b/src/cunumpy/xp.py index 36d3b9c..8e92e9a 100644 --- a/src/cunumpy/xp.py +++ b/src/cunumpy/xp.py @@ -8,6 +8,26 @@ BackendType = Literal["numpy", "cupy"] +_CUPY_AVAILABLE_CACHE = None + + +def cupy_available() -> bool: + """Check if CuPy is available and functional.""" + global _CUPY_AVAILABLE_CACHE + if _CUPY_AVAILABLE_CACHE is not None: + return _CUPY_AVAILABLE_CACHE + + try: + import cupy as cp + + # Check if a GPU is available + _CUPY_AVAILABLE_CACHE = cp.is_available() + return _CUPY_AVAILABLE_CACHE + except (ImportError, Exception): + _CUPY_AVAILABLE_CACHE = False + return False + + class ArrayBackend: def __init__( self, @@ -27,13 +47,13 @@ def __init__( def _load_backend(self, backend: BackendType, verbose: bool = False) -> ModuleType: if backend == "cupy": - try: + if cupy_available(): import cupy as cp return cp - except ImportError: + else: if verbose: - print("CuPy not available.") + print("CuPy not available or not functional.") return np import numpy as np_mod @@ -123,17 +143,17 @@ def to_numpy(array: Any) -> np.ndarray: def to_cupy(array: Any) -> Any: """Convert an array to a CuPy array.""" - try: - import cupy as cp + if not cupy_available(): + raise ImportError("CuPy is not available or not functional.") + + import cupy as cp - return cp.asarray(array) - except ImportError: - raise ImportError("CuPy is not available.") + return cp.asarray(array) def to_cunumpy(array: Any) -> Any: """Convert an array to the currently active backend.""" - if array_backend.backend == "cupy": + if array_backend.backend == "cupy" and cupy_available(): return to_cupy(array) return to_numpy(array) diff --git a/tests/unit/test_benchmarks.py b/tests/unit/test_benchmarks.py new file mode 100644 index 0000000..f9a9e35 --- /dev/null +++ b/tests/unit/test_benchmarks.py @@ -0,0 +1,80 @@ +import time + +import numpy as np +import pytest + +import cunumpy as xp + + +@pytest.mark.skipif( + not xp.cupy_available(), reason="CuPy/GPU not available or not functional" +) +def test_benchmark_matmul(): + """Benchmark matrix multiplication to show CuPy performance gain.""" + size = 2000 + + # --- Benchmark NumPy --- + with xp.use_backend("numpy"): + a_np = xp.random.rand(size, size).astype(xp.float32) + b_np = xp.random.rand(size, size).astype(xp.float32) + + start_np = time.perf_counter() + c_np = a_np @ b_np + # No sync needed for NumPy as it is synchronous + end_np = time.perf_counter() + t_np = end_np - start_np + + # --- Benchmark CuPy --- + with xp.use_backend("cupy"): + a_cp = xp.random.rand(size, size).astype(xp.float32) + b_cp = xp.random.rand(size, size).astype(xp.float32) + + # Warm up + _ = a_cp @ b_cp + xp.synchronize() + + start_cp = time.perf_counter() + c_cp = a_cp @ b_cp + xp.synchronize() # CRITICAL for benchmarking GPU + end_cp = time.perf_counter() + t_cp = end_cp - start_cp + + print(f"\n[Benchmark] Size: {size}x{size}") + print(f"NumPy time: {t_np:.4f}s") + print(f"CuPy time: {t_cp:.4f}s") + print(f"Speedup: {t_np/t_cp:.2f}x") + + # On a real GPU (A100/A30), CuPy should be significantly faster + # We use a conservative threshold of 1.5x for the test to pass on various hardware + assert t_cp < t_np, f"CuPy ({t_cp:.4f}s) was not faster than NumPy ({t_np:.4f}s)" + + +@pytest.mark.skipif( + not xp.cupy_available(), reason="CuPy/GPU not available or not functional" +) +def test_benchmark_fft(): + """Benchmark FFT performance.""" + size = 2**22 # ~4 million elements + + with xp.use_backend("numpy"): + data_np = xp.random.rand(size).astype(xp.complex64) + start = time.perf_counter() + _ = xp.fft.fft(data_np) + t_np = time.perf_counter() - start + + with xp.use_backend("cupy"): + data_cp = xp.random.rand(size).astype(xp.complex64) + # Warm up + _ = xp.fft.fft(data_cp) + xp.synchronize() + + start = time.perf_counter() + _ = xp.fft.fft(data_cp) + xp.synchronize() + t_cp = time.perf_counter() - start + + print(f"\n[Benchmark] FFT Size: {size}") + print(f"NumPy time: {t_np:.4f}s") + print(f"CuPy time: {t_cp:.4f}s") + print(f"Speedup: {t_np/t_cp:.2f}x") + assert t_cp < t_np diff --git a/tests/unit/test_cupy.py b/tests/unit/test_cupy.py index 935a27a..a8e804d 100644 --- a/tests/unit/test_cupy.py +++ b/tests/unit/test_cupy.py @@ -5,10 +5,10 @@ def test_to_cupy_available(): - try: - import cupy as cp - except ImportError: - pytest.skip("CuPy not installed") + if not xp.cupy_available(): + pytest.skip("CuPy not installed or not functional") + + import cupy as cp with xp.use_backend("cupy"): arr = np.array([1, 2, 3]) @@ -17,12 +17,8 @@ def test_to_cupy_available(): def test_to_cupy_not_available(): - try: - import cupy - - pytest.skip("CuPy is installed, cannot test missing cupy error") - except ImportError: - pass + if xp.cupy_available(): + pytest.skip("CuPy is installed and functional, cannot test missing cupy error") with xp.use_backend("cupy"): arr = np.array([1, 2, 3]) @@ -42,10 +38,10 @@ def test_synchronize(): def test_xp_array_cupy(): - try: - import cupy as cp - except ImportError: - pytest.skip("CuPy not installed") + if not xp.cupy_available(): + pytest.skip("CuPy not installed or not functional") + + import cupy as cp with xp.use_backend("cupy"): arr = xp.array([1, 2]) diff --git a/tests/unit/test_features.py b/tests/unit/test_features.py new file mode 100644 index 0000000..1e0aced --- /dev/null +++ b/tests/unit/test_features.py @@ -0,0 +1,156 @@ +import numpy as np +import pytest + +import cunumpy as xp + + +@pytest.mark.parametrize("backend", ["numpy", "cupy"]) +def test_matrix_multiplication(backend): + if backend == "cupy" and not xp.cupy_available(): + pytest.skip("CuPy not installed or not functional") + + with xp.use_backend(backend): + # Test basic @ operator and matmul + a = xp.array([[1, 2], [3, 4]], dtype=float) + b = xp.array([[5, 6], [7, 8]], dtype=float) + c = a @ b + + expected = np.array([[19, 22], [43, 50]]) + assert xp.array_equal(xp.to_numpy(c), expected) + + # Test linalg.norm + norm = xp.linalg.norm(a) + assert np.isclose(float(norm), np.linalg.norm([[1, 2], [3, 4]])) + + +@pytest.mark.parametrize("backend", ["numpy", "cupy"]) +def test_reductions_and_axes(backend): + if backend == "cupy" and not xp.cupy_available(): + pytest.skip("CuPy not installed or not functional") + + with xp.use_backend(backend): + a = xp.array([[1, 10, 100], [2, 20, 200]], dtype=float) + + assert xp.sum(a) == 333 + assert np.array_equal(xp.to_numpy(xp.max(a, axis=0)), [2, 20, 200]) + assert np.array_equal(xp.to_numpy(xp.min(a, axis=1)), [1, 2]) + assert xp.mean(a) == 333 / 6 + + +@pytest.mark.parametrize("backend", ["numpy", "cupy"]) +def test_complex_elementwise(backend): + if backend == "cupy" and not xp.cupy_available(): + pytest.skip("CuPy not installed or not functional") + + with xp.use_backend(backend): + a = xp.array([-1, 0, 1], dtype=float) + + # Exp and Log + exp_a = xp.exp(a) + assert np.allclose(xp.to_numpy(exp_a), np.exp([-1, 0, 1])) + + # Trig + b = xp.array([0, xp.pi / 2], dtype=float) + assert np.allclose(xp.to_numpy(xp.cos(b)), [1, 0], atol=1e-7) + + +@pytest.mark.parametrize("backend", ["numpy", "cupy"]) +def test_broadcasting_logic(backend): + if backend == "cupy" and not xp.cupy_available(): + pytest.skip("CuPy not installed or not functional") + + with xp.use_backend(backend): + # 3D + 1D broadcasting + a = xp.ones((2, 3, 4)) + b = xp.arange(4) + c = a * b + + assert c.shape == (2, 3, 4) + assert np.array_equal(xp.to_numpy(c[0, 0]), [0, 1, 2, 3]) + assert np.array_equal(xp.to_numpy(c[1, 2]), [0, 1, 2, 3]) + + +@pytest.mark.parametrize("backend", ["numpy", "cupy"]) +def test_fft_parity(backend): + if backend == "cupy" and not xp.cupy_available(): + pytest.skip("CuPy not installed or not functional") + + with xp.use_backend(backend): + # Create a signal with two frequencies + t = xp.linspace(0, 1, 128) + sig = xp.sin(2 * xp.pi * 5 * t) + 0.5 * xp.sin(2 * xp.pi * 20 * t) + + freqs = xp.fft.fft(sig) + inv = xp.fft.ifft(freqs) + + # ifft(fft(x)) == x + assert np.allclose(xp.to_numpy(inv.real), xp.to_numpy(sig)) + + +@pytest.mark.parametrize("backend", ["numpy", "cupy"]) +def test_realistic_normalization_workflow(backend): + """Workflow: Load data -> Compute Stats -> Normalize -> Mask Outliers.""" + if backend == "cupy" and not xp.cupy_available(): + pytest.skip("CuPy not installed or not functional") + + with xp.use_backend(backend): + # 1. Create dummy data with clear outliers + data = xp.array([1.0, 2.0, 3.0, 4.0, 100.0, -100.0]) + + # 2. Normalize + mean = xp.mean(data) + std = xp.std(data) + norm_data = (data - mean) / std + + # 3. Mask outliers (abs > 1.0 in this specific small set) + mask = xp.abs(norm_data) < 1.0 + clean_data = data[mask] + + # Verify: -100 and 100 should be gone + res = xp.to_numpy(xp.sort(clean_data)) + assert np.array_equal(res, [1.0, 2.0, 3.0, 4.0]) + + +@pytest.mark.parametrize("backend", ["numpy", "cupy"]) +def test_stacking_and_concatenation(backend): + if backend == "cupy" and not xp.cupy_available(): + pytest.skip("CuPy not installed or not functional") + + with xp.use_backend(backend): + a = xp.array([1, 2, 3]) + b = xp.array([4, 5, 6]) + + res_cat = xp.concatenate([a, b]) + assert np.array_equal(xp.to_numpy(res_cat), [1, 2, 3, 4, 5, 6]) + + res_stack = xp.stack([a, b]) + assert res_stack.shape == (2, 3) + assert np.array_equal(xp.to_numpy(res_stack[1]), [4, 5, 6]) + + +@pytest.mark.parametrize("backend", ["numpy", "cupy"]) +def test_advanced_indexing(backend): + if backend == "cupy" and not xp.cupy_available(): + pytest.skip("CuPy not installed or not functional") + + with xp.use_backend(backend): + a = xp.arange(10).reshape(2, 5) + + # Pick specific elements: (0,1) and (1,3) + rows = xp.array([0, 1]) + cols = xp.array([1, 3]) + + indexed = a[rows, cols] + assert np.array_equal(xp.to_numpy(indexed), [1, 8]) + + +@pytest.mark.parametrize("backend", ["numpy", "cupy"]) +def test_random_generation(backend): + if backend == "cupy" and not xp.cupy_available(): + pytest.skip("CuPy not installed or not functional") + + with xp.use_backend(backend): + # Test reproducibility if we were to add seed (checking existing proxy) + a = xp.random.normal(0, 1, size=(100, 100)) + assert a.shape == (100, 100) + assert xp.abs(xp.mean(a)) < 0.5 # Basic statistical sanity diff --git a/tests/unit/test_integration.py b/tests/unit/test_integration.py new file mode 100644 index 0000000..d13764f --- /dev/null +++ b/tests/unit/test_integration.py @@ -0,0 +1,78 @@ +import numpy as np +import pytest + +import cunumpy as xp + + +def test_data_movement_chain(): + """Test CPU -> GPU -> CPU multi-hop movement.""" + if not xp.cupy_available(): + pytest.skip("CuPy not installed or not functional") + + # 1. Start on CPU + data_orig = np.random.rand(100, 100).astype(np.float32) + + # 2. Move to GPU + data_gpu = xp.to_cupy(data_orig) + assert xp.is_gpu(data_gpu) + + # 3. Do operation on GPU + with xp.use_backend("cupy"): + res_gpu = xp.sin(data_gpu) ** 2 + xp.cos(data_gpu) ** 2 + + # 4. Move back to CPU + res_cpu = xp.to_numpy(res_gpu) + assert isinstance(res_cpu, np.ndarray) + assert np.allclose(res_cpu, 1.0) + + +def test_synchronize_logic(): + """Verify synchronize can be called and handles errors gracefully.""" + # This is more of a smoke test to ensure the path doesn't crash + xp.synchronize() + + if xp.cupy_available(): + import cupy as cp + + with xp.use_backend("cupy"): + a = xp.random.rand(100) + xp.synchronize() + assert xp.is_gpu(a) + + +def test_fft_interop(): + """Test FFT between backends.""" + if not xp.cupy_available(): + pytest.skip("CuPy not installed or not functional") + + # Create signal on CPU + sig_cpu = np.random.rand(1024).astype(np.complex128) + + # Move to GPU and transform + sig_gpu = xp.to_cupy(sig_cpu) + freq_gpu = xp.fft.fft(sig_gpu) + + # Move frequencies to CPU and transform back + freq_cpu = xp.to_numpy(freq_gpu) + sig_reconstructed = np.fft.ifft(freq_cpu) + + assert np.allclose(sig_cpu, sig_reconstructed) + + +def test_mixed_backend_errors(): + """Verify that mixing backends in operations raises errors (standard NumPy/CuPy behavior).""" + if not xp.cupy_available(): + pytest.skip("CuPy not installed or not functional") + + a_cpu = np.array([1, 2, 3]) + a_gpu = xp.to_cupy(a_cpu) + + # This should fail because you can't add CPU and GPU arrays directly + with pytest.raises(Exception): + _ = a_cpu + a_gpu + + # But to_cunumpy should fix it + a_gpu_fixed = xp.to_cunumpy(a_cpu) + with xp.use_backend("cupy"): + res = a_gpu + a_gpu_fixed + assert xp.is_gpu(res)