forked from JuliaAttic/CUDA.jl
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- src/event.jl: function crate_event, destroy, record, elapsed_time - examples/performance.jl: example to check performance of kernel with CUDA event
- Loading branch information
1 parent
f486d21
commit ab1aed8
Showing
3 changed files
with
115 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
# An example to benchmark with/without CUDA streaming | ||
|
||
using CUDA | ||
|
||
# array length | ||
len = 1024 * 1024 | ||
|
||
# thread counts in thread block | ||
block = 256 | ||
|
||
try | ||
# create context | ||
dev = CuDevice(0) | ||
ctx = create_context(dev) | ||
|
||
# load module from vadd.ptx | ||
md = CuModule("vadd.ptx") | ||
|
||
# get function vadd | ||
f = CuFunction(md, "vadd") | ||
|
||
# prepare arrays to compute | ||
a = round(rand(Float32, len) * 100) | ||
b = round(rand(Float32, len) * 100) | ||
|
||
# create array c on GPU | ||
gc = CuArray(Float32, len) | ||
|
||
# crate event to check performance | ||
e1 = create_event() | ||
e2 = create_event() | ||
|
||
# start event | ||
record(e1) | ||
|
||
# load arrays to GPU | ||
ga = CuArray(a) | ||
gb = CuArray(b) | ||
|
||
# launch kernel | ||
launch(f, div(len, block), block, (ga, gb, gc)) | ||
|
||
# fetch results from GPU | ||
c = to_host(gc) | ||
|
||
# end event | ||
record(e2) | ||
|
||
# elapsed time: milliseconds | ||
println("Elapsed time: $(elapsed_time(e1, e2)) ms") | ||
|
||
# destroy events | ||
destroy(e1) | ||
destroy(e2) | ||
|
||
# free GPU memory | ||
free(ga) | ||
free(gb) | ||
free(gc) | ||
|
||
# check result | ||
println("c == (a + b) ? $(c == (a + b))") | ||
|
||
# unload module | ||
unload(md) | ||
|
||
# destroy context | ||
destroy(ctx) | ||
|
||
catch err | ||
if isa(err, CuDriverError) | ||
println("$err: $(description(err))") | ||
else | ||
throw(err) | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
# CUDA event | ||
|
||
immutable CuEvent | ||
handle::Ptr{Void} | ||
end | ||
|
||
const CU_EVENT_DEFAULT = 0x0 # Default event flag | ||
const CU_EVENT_BLOCKING_SYNC = 0x1 # Event uses blocking synchronization | ||
const CU_EVENT_DISABLE_TIMING = 0x2 # Event will not record timing data | ||
const CU_EVENT_INTERPROCESS = 0x4 # Event is suitable for interprocess use. CU_EVENT_DISABLE_TIMING must be set | ||
|
||
function create_event(flags::Integer = CU_EVENT_DEFAULT) | ||
ref = Array(Ptr{Void}, 1) | ||
@cucall(:cuEventCreate, (Ptr{Ptr{Void}}, Cuint), ref, flags) | ||
return CuEvent(ref[1]) | ||
end | ||
|
||
function destroy(event::CuEvent) | ||
@cucall(:cuEventDestroy, (Ptr{Void},), event.handle) | ||
end | ||
|
||
function record(event::CuEvent) | ||
# TODO: support stream | ||
@cucall(:cuEventRecord, | ||
(Ptr{Void}, Ptr{Void}), | ||
event.handle, 0) | ||
end | ||
|
||
function elapsed_time(start_event::CuEvent, end_event::CuEvent) | ||
elapsed = Array(Float32, 1) | ||
@cucall(:cuEventElapsedTime, | ||
(Ptr{Float32}, Ptr{Void}, Ptr{Void}), | ||
elapsed, start_event.handle, end_event.handle) | ||
return elapsed[1] | ||
end |