In [None]:
import Pkg
Pkg.activate(".")  # Activates the Project.toml in current directory
Pkg.instantiate() # Installs all dependencies
Pkg.status()       # Shows the activated environment

In [None]:
using Revise

In [None]:
ENV["CLIMACOMMS_DEVICE"] = "CUDA"

In [None]:
# Check SLURM environment variables
env_vars = ["SLURM_JOB_ID", "SLURM_JOB_GPUS", "CUDA_VISIBLE_DEVICES", "SLURM_GPUS_ON_NODE"]
for var in env_vars
    val = get(ENV, var, "not set")
    println("$var: $val")
end

In [None]:
import CUDA

if CUDA.functional()
    # Create arrays on GPU
    x_gpu = CUDA.randn(1000, 1000)
    y_gpu = CUDA.randn(1000, 1000)

    # Perform computation on GPU
    z_gpu = x_gpu * y_gpu

    println("GPU computation successful!")
    println("Result size: ", size(z_gpu))
    println("Result type: ", typeof(z_gpu))
    println("Device: ", CUDA.device(z_gpu))
else
    println("GPU not available - running on CPU")
end

In [None]:
if CUDA.functional()
    for (i, dev) in enumerate(CUDA.devices())
        CUDA.device!(dev)
        println("GPU $i:")
        println("  Name: ", CUDA.name(dev))
        println("  Compute capability: ", CUDA.capability(dev))
        println("  Total memory: ", CUDA.totalmem(dev) ÷ (1024^2), " MB")
        println("  Free memory: ", CUDA.available_memory() ÷ (1024^2), " MB")
    end
end

In [None]:
# Let's set up a basic case so we can profile/benchmark in the notebook
import ClimaAtmos as CA

config = CA.AtmosConfig(
    ["ClimaAtmos.jl/config/default_configs/default_config.yml"],
    job_id="notebook-debugging"
)

simulation = CA.get_simulation(config)
(; integrator) = simulation;
Y₀ = deepcopy(integrator.u);
@info "Compiling benchmark_step!..."
# turn on renaming of CUDA kernels based on stack trace
profile_rename_kernel_names() = false
CA.benchmark_step!(integrator, Y₀); # compile first

In [None]:
CA.benchmark_step!(integrator, Y₀);