forked from JuliaGPU/CUDA.jl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
utilities.jl
102 lines (91 loc) · 3.31 KB
/
utilities.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
"""
@sync [blocking=true] ex
Run expression `ex` and synchronize the GPU afterwards. By default, this is a CPU-friendly
synchronization, i.e. it performs a blocking synchronization without increasing CPU load
As such, this operation is preferred over implicit synchronization (e.g. when performing a
memory copy) for high-performance applications.
It is also useful for timing code that executes asynchronously.
"""
macro sync(ex...)
# destructure the `@sync` expression
code = ex[end]
kwargs = ex[1:end-1]
# decode keyword arguments
blocking = true
for kwarg in kwargs
Meta.isexpr(kwarg, :(=)) || error("Invalid keyword argument $kwarg")
key, val = kwarg.args
if key == :blocking
blocking = val
else
error("Unknown keyword argument $kwarg")
end
end
flags = EVENT_DISABLE_TIMING
if blocking
flags |= EVENT_BLOCKING_SYNC
end
quote
local e = CuEvent($flags)
local ret = $(esc(code))
record(e)
synchronize(e)
ret
end
end
function versioninfo(io::IO=stdout)
println(io, "CUDA toolkit $(toolkit_version()), $(toolkit_origin()) installation")
println(io, "CUDA driver $(release())")
if has_nvml()
println(io, "NVIDIA driver $(NVML.driver_version())")
end
println(io)
println(io, "Libraries: ")
for lib in (:CUBLAS, :CURAND, :CUFFT, :CUSOLVER, :CUSPARSE)
mod = getfield(CUDA, lib)
println(io, "- $lib: ", mod.version())
end
println(io, "- CUPTI: ", has_cupti() ? CUPTI.version() : "missing")
println(io, "- NVML: ", has_nvml() ? NVML.version() : "missing")
println(io, "- CUDNN: ", has_cudnn() ? "$(CUDNN.version()) (for CUDA $(CUDNN.cuda_version()))" : "missing")
println(io, "- CUTENSOR: ", has_cutensor() ? "$(CUTENSOR.version()) (for CUDA $(CUTENSOR.cuda_version()))" : "missing")
println(io)
println(io, "Toolchain:")
println(io, "- Julia: $VERSION")
println(io, "- LLVM: $(LLVM.version())")
println(io, "- PTX ISA support: $(join(map(ver->"$(ver.major).$(ver.minor)", __ptx_support[]), ", "))")
println(io, "- Device support: $(join(map(ver->"sm_$(ver.major)$(ver.minor)", __target_support[]), ", "))")
println(io)
env = filter(var->startswith(var, "JULIA_CUDA"), keys(ENV))
if !isempty(env)
println(io, "Environment:")
for var in env
println(io, "- $var: $(ENV[var])")
end
println(io)
end
devs = devices()
if isempty(devs)
println(io, "No CUDA-capable devices.")
elseif length(devs) == 1
println(io, "1 device:")
else
println(io, length(devs), " devices:")
end
for (i, dev) in enumerate(devs)
if has_nvml()
dev′ = NVML.Device(uuid(dev))
str = NVML.name(dev′)
cap = NVML.compute_capability(dev′)
mem = NVML.memory_info(dev′)
else
str = name(dev)
cap = capability(dev)
mem = device!(dev) do
# this requires a device context, so we prefer NVML
(free=available_memory(), total=total_memory())
end
end
println(io, " $(i-1): $str (sm_$(cap.major)$(cap.minor), $(Base.format_bytes(mem.free)) / $(Base.format_bytes(mem.total)) available)")
end
end