In [None]:
import Pkg
Pkg.activate(".")  # Activates the Project.toml in current directory
Pkg.instantiate() # Installs all dependencies
Pkg.status()       # Shows the activated environment

In [None]:
using Revise

In [None]:
# Verify we're running with SLURM GPU allocation
println("=== GPU Verification ===")
println("SLURM_JOB_ID: ", get(ENV, "SLURM_JOB_ID", "not set"))
println("SLURM_JOB_GPUS: ", get(ENV, "SLURM_JOB_GPUS", "not set"))
println("CUDA_VISIBLE_DEVICES: ", get(ENV, "CUDA_VISIBLE_DEVICES", "not set"))

using CUDA
if CUDA.functional()
    println("✓ CUDA functional")
    println("✓ GPU device: ", CUDA.name(CUDA.device()))
    println("✓ Memory: ", CUDA.totalmem(CUDA.device()) ÷ (1024^3), " GB")
else
    println("✗ CUDA not functional!")
end

In [None]:
ENV["CLIMACOMMS_DEVICE"] = "CUDA"
ENV["CLIMACOMMS_CONTEXT"] = "SINGLETON"

import ClimaComms
ClimaComms.@import_required_backends
import Random
Random.seed!(1234)
import ClimaAtmos as CA

config = CA.AtmosConfig(
    ["ClimaAtmos.jl/config/default_configs/default_config.yml"],
    job_id="notebook-debugging"
)

simulation = CA.get_simulation(config)
(; integrator) = simulation;
Y₀ = deepcopy(integrator.u);
@info "Compiling benchmark_step!..."
CA.benchmark_step!(integrator, Y₀); # compile first


In [None]:
import CUDA

@CUDA.elapsed CA.benchmark_step!(integrator, Y₀, 1)

In [None]:
import CUDA

if CUDA.functional()
    # Create arrays on GPU
    x_gpu = CUDA.randn(1000, 1000)
    y_gpu = CUDA.randn(1000, 1000)

    # Perform computation on GPU
    z_gpu = x_gpu * y_gpu

    println("GPU computation successful!")
    println("Result size: ", size(z_gpu))
    println("Result type: ", typeof(z_gpu))
    println("Device: ", CUDA.device(z_gpu))
else
    println("GPU not available - running on CPU")
end

In [None]:
if CUDA.functional()
    for (i, dev) in enumerate(CUDA.devices())
        CUDA.device!(dev)
        println("GPU $i:")
        println("  Name: ", CUDA.name(dev))
        println("  Compute capability: ", CUDA.capability(dev))
        println("  Total memory: ", CUDA.totalmem(dev) ÷ (1024^2), " MB")
        println("  Free memory: ", CUDA.available_memory() ÷ (1024^2), " MB")
    end
end