forked from JuliaGPU/CUDA.jl
/
runbenchmarks.jl
127 lines (96 loc) · 3.65 KB
/
runbenchmarks.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# benchmark suite execution and codespeed submission
using CUDA
using BenchmarkTools
using StableRNGs
rng = StableRNG(123)
# we only submit results when running on the master branch
real_run = get(ENV, "CODESPEED_BRANCH", nothing) == "master"
if real_run
# to find untuned benchmarks
BenchmarkTools.DEFAULT_PARAMETERS.evals = 0
end
# convenience macro to create a benchmark that requires synchronizing the GPU
macro async_benchmarkable(ex...)
quote
@benchmarkable CUDA.@sync $(ex...)
end
end
# before anything else, run latency benchmarks. these spawn subprocesses, so we don't want
# to do so after regular benchmarks have caused the memory allocator to reserve memory.
@info "Running latency benchmarks"
latency_results = include("latency.jl")
SUITE = BenchmarkGroup()
# NOTE: don't use spaces in benchmark names (tobami/codespeed#256)
include("kernel.jl")
include("array.jl")
if real_run
@info "Preparing main benchmarks"
warmup(SUITE; verbose=false)
tune!(SUITE)
# reclaim memory that might have been used by the tuning process
GC.gc(true)
CUDA.reclaim()
end
# benchmark groups that aren't part of the suite
addgroup!(SUITE, "integration")
@info "Running main benchmarks"
results = run(SUITE, verbose=true)
# integration tests (that do nasty things, so need to be run last)
@info "Running integration benchmarks"
integration_results = BenchmarkGroup()
integration_results["volumerhs"] = include("volumerhs.jl")
integration_results["byval"] = include("byval.jl")
integration_results["cudadevrt"] = include("cudadevrt.jl")
results["latency"] = latency_results
results["integration"] = integration_results
println(results)
## comparison
# write out the results
BenchmarkTools.save(joinpath(@__DIR__, "results.json"), results)
# compare against previous results
# TODO: store these results so that we can compare when benchmarking PRs
reference_path = joinpath(@__DIR__, "reference.json")
if ispath(reference_path)
reference = BenchmarkTools.load(reference_path)[1]
comparison = judge(minimum(results), minimum(reference))
println("Improvements:")
println(improvements(comparison))
println("Regressions:")
println(regressions(comparison))
end
## submission
using JSON, HTTP
if real_run
@info "Submitting to Codespeed..."
basedata = Dict(
"branch" => ENV["CODESPEED_BRANCH"],
"commitid" => ENV["CODESPEED_COMMIT"],
"project" => ENV["CODESPEED_PROJECT"],
"environment" => ENV["CODESPEED_ENVIRONMENT"],
"executable" => ENV["CODESPEED_EXECUTABLE"]
)
# convert nested groups of benchmark to flat dictionaries of results
flat_results = []
function flatten(results, prefix="")
for (key,value) in results
if value isa BenchmarkGroup
flatten(value, "$prefix$key/")
else
@assert value isa BenchmarkTools.Trial
# codespeed reports maxima, but those are often very noisy.
# get rid of measurements that unnecessarily skew the distribution.
rmskew!(value)
push!(flat_results,
Dict(basedata...,
"benchmark" => "$prefix$key",
"result_value" => median(value).time / 1e9,
"min" => minimum(value).time / 1e9,
"max" => maximum(value).time / 1e9))
end
end
end
flatten(results)
HTTP.post("$(ENV["CODESPEED_SERVER"])/result/add/json/",
["Content-Type" => "application/x-www-form-urlencoded"],
HTTP.URIs.escapeuri(Dict("json" => JSON.json(flat_results))))
end