Skip to content

Commit 317d48b

Browse files
authored
Unrolled build for #147315
Rollup merge of #147315 - ZuseZ4:fix-ad-batching-test, r=jieyouxu bless autodiff batching test This pr blesses a broken test and unblocks running rust in the Enzyme CI: EnzymeAD/Enzyme#2430 Enzyme is the plugin used by our std::autodiff and (future) std::batching modules, both of which are not build by default. In the near future we also hope to enable std::autodiff in the Rust CI. This test is the only one to combine two features, automatic differentiation and batching/vectorization. This combination is even more experimental than either feature on its own. I have a wip branch in which I enable more vectorization/batching and as part of that I'll think more about how to write those tests in a robust way (and likely change the interface). Until that lands, I don't care too much about what specific IR we generate here; it's just nice to track changes. r? compiler
2 parents 99ca0ae + 12cfad9 commit 317d48b

File tree

1 file changed

+42
-32
lines changed

1 file changed

+42
-32
lines changed

tests/codegen-llvm/autodiff/batched.rs

Lines changed: 42 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//@ compile-flags: -Zautodiff=Enable,NoTT -C opt-level=3 -Clto=fat
1+
//@ compile-flags: -Zautodiff=Enable,NoTT,NoPostopt -C opt-level=3 -Clto=fat
22
//@ no-prefer-dynamic
33
//@ needs-enzyme
44
//
@@ -23,7 +23,7 @@ fn square(x: &f32) -> f32 {
2323
}
2424

2525
// d_square2
26-
// CHECK: define internal fastcc [4 x float] @fwddiffe4square(float %x.0.val, [4 x ptr] %"x'")
26+
// CHECK: define internal [4 x float] @fwddiffe4square(ptr noalias noundef readonly align 4 captures(none) dereferenceable(4) %x, [4 x ptr] %"x'")
2727
// CHECK-NEXT: start:
2828
// CHECK-NEXT: %0 = extractvalue [4 x ptr] %"x'", 0
2929
// CHECK-NEXT: %"_2'ipl" = load float, ptr %0, align 4
@@ -33,23 +33,28 @@ fn square(x: &f32) -> f32 {
3333
// CHECK-NEXT: %"_2'ipl2" = load float, ptr %2, align 4
3434
// CHECK-NEXT: %3 = extractvalue [4 x ptr] %"x'", 3
3535
// CHECK-NEXT: %"_2'ipl3" = load float, ptr %3, align 4
36-
// CHECK-NEXT: %4 = fmul float %"_2'ipl", 2.000000e+00
37-
// CHECK-NEXT: %5 = fmul fast float %4, %x.0.val
38-
// CHECK-NEXT: %6 = insertvalue [4 x float] undef, float %5, 0
39-
// CHECK-NEXT: %7 = fmul float %"_2'ipl1", 2.000000e+00
40-
// CHECK-NEXT: %8 = fmul fast float %7, %x.0.val
41-
// CHECK-NEXT: %9 = insertvalue [4 x float] %6, float %8, 1
42-
// CHECK-NEXT: %10 = fmul float %"_2'ipl2", 2.000000e+00
43-
// CHECK-NEXT: %11 = fmul fast float %10, %x.0.val
44-
// CHECK-NEXT: %12 = insertvalue [4 x float] %9, float %11, 2
45-
// CHECK-NEXT: %13 = fmul float %"_2'ipl3", 2.000000e+00
46-
// CHECK-NEXT: %14 = fmul fast float %13, %x.0.val
47-
// CHECK-NEXT: %15 = insertvalue [4 x float] %12, float %14, 3
48-
// CHECK-NEXT: ret [4 x float] %15
36+
// CHECK-NEXT: %_2 = load float, ptr %x, align 4
37+
// CHECK-NEXT: %4 = fmul fast float %"_2'ipl", %_2
38+
// CHECK-NEXT: %5 = fmul fast float %"_2'ipl1", %_2
39+
// CHECK-NEXT: %6 = fmul fast float %"_2'ipl2", %_2
40+
// CHECK-NEXT: %7 = fmul fast float %"_2'ipl3", %_2
41+
// CHECK-NEXT: %8 = fmul fast float %"_2'ipl", %_2
42+
// CHECK-NEXT: %9 = fmul fast float %"_2'ipl1", %_2
43+
// CHECK-NEXT: %10 = fmul fast float %"_2'ipl2", %_2
44+
// CHECK-NEXT: %11 = fmul fast float %"_2'ipl3", %_2
45+
// CHECK-NEXT: %12 = fadd fast float %4, %8
46+
// CHECK-NEXT: %13 = insertvalue [4 x float] undef, float %12, 0
47+
// CHECK-NEXT: %14 = fadd fast float %5, %9
48+
// CHECK-NEXT: %15 = insertvalue [4 x float] %13, float %14, 1
49+
// CHECK-NEXT: %16 = fadd fast float %6, %10
50+
// CHECK-NEXT: %17 = insertvalue [4 x float] %15, float %16, 2
51+
// CHECK-NEXT: %18 = fadd fast float %7, %11
52+
// CHECK-NEXT: %19 = insertvalue [4 x float] %17, float %18, 3
53+
// CHECK-NEXT: ret [4 x float] %19
4954
// CHECK-NEXT: }
5055

5156
// d_square3, the extra float is the original return value (x * x)
52-
// CHECK: define internal fastcc { float, [4 x float] } @fwddiffe4square.1(float %x.0.val, [4 x ptr] %"x'")
57+
// CHECK: define internal { float, [4 x float] } @fwddiffe4square.1(ptr noalias noundef readonly align 4 captures(none) dereferenceable(4) %x, [4 x ptr] %"x'")
5358
// CHECK-NEXT: start:
5459
// CHECK-NEXT: %0 = extractvalue [4 x ptr] %"x'", 0
5560
// CHECK-NEXT: %"_2'ipl" = load float, ptr %0, align 4
@@ -59,22 +64,27 @@ fn square(x: &f32) -> f32 {
5964
// CHECK-NEXT: %"_2'ipl2" = load float, ptr %2, align 4
6065
// CHECK-NEXT: %3 = extractvalue [4 x ptr] %"x'", 3
6166
// CHECK-NEXT: %"_2'ipl3" = load float, ptr %3, align 4
62-
// CHECK-NEXT: %_0 = fmul float %x.0.val, %x.0.val
63-
// CHECK-NEXT: %4 = fmul float %"_2'ipl", 2.000000e+00
64-
// CHECK-NEXT: %5 = fmul fast float %4, %x.0.val
65-
// CHECK-NEXT: %6 = insertvalue [4 x float] undef, float %5, 0
66-
// CHECK-NEXT: %7 = fmul float %"_2'ipl1", 2.000000e+00
67-
// CHECK-NEXT: %8 = fmul fast float %7, %x.0.val
68-
// CHECK-NEXT: %9 = insertvalue [4 x float] %6, float %8, 1
69-
// CHECK-NEXT: %10 = fmul float %"_2'ipl2", 2.000000e+00
70-
// CHECK-NEXT: %11 = fmul fast float %10, %x.0.val
71-
// CHECK-NEXT: %12 = insertvalue [4 x float] %9, float %11, 2
72-
// CHECK-NEXT: %13 = fmul float %"_2'ipl3", 2.000000e+00
73-
// CHECK-NEXT: %14 = fmul fast float %13, %x.0.val
74-
// CHECK-NEXT: %15 = insertvalue [4 x float] %12, float %14, 3
75-
// CHECK-NEXT: %16 = insertvalue { float, [4 x float] } undef, float %_0, 0
76-
// CHECK-NEXT: %17 = insertvalue { float, [4 x float] } %16, [4 x float] %15, 1
77-
// CHECK-NEXT: ret { float, [4 x float] } %17
67+
// CHECK-NEXT: %_2 = load float, ptr %x, align 4
68+
// CHECK-NEXT: %_0 = fmul float %_2, %_2
69+
// CHECK-NEXT: %4 = fmul fast float %"_2'ipl", %_2
70+
// CHECK-NEXT: %5 = fmul fast float %"_2'ipl1", %_2
71+
// CHECK-NEXT: %6 = fmul fast float %"_2'ipl2", %_2
72+
// CHECK-NEXT: %7 = fmul fast float %"_2'ipl3", %_2
73+
// CHECK-NEXT: %8 = fmul fast float %"_2'ipl", %_2
74+
// CHECK-NEXT: %9 = fmul fast float %"_2'ipl1", %_2
75+
// CHECK-NEXT: %10 = fmul fast float %"_2'ipl2", %_2
76+
// CHECK-NEXT: %11 = fmul fast float %"_2'ipl3", %_2
77+
// CHECK-NEXT: %12 = fadd fast float %4, %8
78+
// CHECK-NEXT: %13 = insertvalue [4 x float] undef, float %12, 0
79+
// CHECK-NEXT: %14 = fadd fast float %5, %9
80+
// CHECK-NEXT: %15 = insertvalue [4 x float] %13, float %14, 1
81+
// CHECK-NEXT: %16 = fadd fast float %6, %10
82+
// CHECK-NEXT: %17 = insertvalue [4 x float] %15, float %16, 2
83+
// CHECK-NEXT: %18 = fadd fast float %7, %11
84+
// CHECK-NEXT: %19 = insertvalue [4 x float] %17, float %18, 3
85+
// CHECK-NEXT: %20 = insertvalue { float, [4 x float] } undef, float %_0, 0
86+
// CHECK-NEXT: %21 = insertvalue { float, [4 x float] } %20, [4 x float] %19, 1
87+
// CHECK-NEXT: ret { float, [4 x float] } %21
7888
// CHECK-NEXT: }
7989

8090
fn main() {

0 commit comments

Comments
 (0)