forked from JuliaGPU/CUDA.jl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
cudnn.jl
94 lines (83 loc) · 3.77 KB
/
cudnn.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
using CUDA.CUDNN
@test has_cudnn()
@testset "NNlib" begin
using NNlib
using NNlib: ∇conv_data, ∇conv_filter,
maxpool, meanpool, ∇maxpool, ∇meanpool,
softmax, ∇softmax, logsoftmax, ∇logsoftmax
a, b, c = rand(Float64, 10, 10, 3, 1), rand(Float64, 2, 2, 3, 4), rand(Float64, 9, 9, 4, 1)
da, db, dc = CuArray(a), CuArray(b), CuArray(c)
cdims = DenseConvDims(a, b)
@test NNlib.conv(a, b, cdims) ≈ collect(NNlib.conv(da, db, cdims))
@test ∇conv_data(c, b, cdims) ≈ collect(∇conv_data(dc, db, cdims))
@test ∇conv_filter(a, c, cdims) ≈ collect(∇conv_filter(da, dc, cdims))
# Test for agreement between CPU NNlib and CuDNN versions, across a variety of kwargs
for num_spatial_dims in (1, 2, 3)
# Initialize data we'll run our tests over
C_in = 3
C_out = 4
batch_size = 1
x = rand(Float64, fill(8, num_spatial_dims)..., C_in, batch_size)
w = rand(Float64, fill(2, num_spatial_dims)..., C_in, C_out)
b = rand(Float64, fill(1, num_spatial_dims)..., C_in, C_out)
options = (Dict(), Dict(:dilation => 2), Dict(:flipkernel => true), Dict(:stride => 2),)
algos = (1, 0, 1, 1,)
for (opts, algo) in zip(options, algos)
cdims = DenseConvDims(x, w; opts...)
y = NNlib.conv(x, w, cdims)
# Test that basic convolution is equivalent across GPU/CPU
@test testf((x, w) -> NNlib.conv(x, w, cdims), x, w)
@test testf((y, w) -> ∇conv_data(y, w, cdims), y, w)
@test testf((x, y) -> ∇conv_filter(x, y, cdims), x, y)
# Test that we can use an alternative algorithm without dying
@test_nowarn NNlib.conv!(cu(y), cu(x), cu(w), cdims; algo=algo)
@test_nowarn NNlib.∇conv_data!(cu(x), cu(y), cu(w), cdims; algo=algo)
@test_nowarn NNlib.∇conv_filter!(cu(w), cu(x), cu(y), cdims; algo=algo)
end
# Test that pooling is equivalent across GPU/CPU
pdims = PoolDims(x, 2)
y = maxpool(x, pdims)
dy = ones(size(y))
@test testf(x -> maxpool(x, pdims), x)
@test testf((dy, y, x) -> ∇maxpool(dy, y, x, pdims), dy, y, x)
@test testf(x -> maxpool(x, pdims), x)
@test testf((dy, y, x) -> ∇maxpool(dy, y, x, pdims), dy, y, x)
# CPU implementation of ∇conv_bias!
db = zeros(Float64, 1, 1, 3, 1)
function CUDNN.∇conv_bias!(db, y)
db .= sum(y, dims=(1:(ndims(y)-2)))
return db
end
#@test testf(CUDNN.∇conv_bias!, db, y)
end
for dims in [(5,5), (5,)]
@test testf(softmax, rand(Float64, dims))
@test testf(∇softmax, rand(Float64, dims), rand(Float64, dims))
@test testf(logsoftmax, rand(Float64, dims))
@test testf(∇logsoftmax, rand(Float64, dims), rand(Float64, dims))
end
end
@testset "Activations and Other Ops" begin
@test testf(CUDNN.cudnnAddTensor, cu(rand(Float64, 10, 10, 3, 1)), cu(rand(Float64, 10, 10, 3, 1)))
@test testf(CUDNN.cudnnActivationForward, cu(rand(Float64, 10, 10, 3, 1)), cu(rand(Float64, 10, 10, 3, 1)))
@test testf(CUDNN.cudnnActivationBackward, cu(rand(Float64, 10, 10, 3, 1)), cu(rand(Float64, 10, 10, 3, 1)), cu(rand(Float64, 10, 10, 3, 1)), cu(rand(Float64, 10, 10, 3, 1)))
# activations defined in src/nnlib.jl
ACTIVATION_FUNCTIONS = [σ, logσ, hardσ, hardtanh, relu, leakyrelu, relu6, rrelu,
elu, gelu, celu, swish, lisht, selu, trelu, softplus,
softsign, logcosh, mish, tanhshrink, softshrink];
for dims in ((5,5), (5,))
for f in filter(x -> x != rrelu, ACTIVATION_FUNCTIONS)
@test testf(x -> f.(x), rand(Float64, dims))
end
end
# softplus does not give `Inf` for large arguments
x = cu([1000.])
@test all(softplus.(x) .== x)
end
@testset "Batchnorm" begin
v = rand(2) |> cu
m = rand(2, 5) |> cu
for training in (false, true)
CUDNN.batchnorm(v, v, m, v, v, 1.0; training=training)
end
end