154 changes: 77 additions & 77 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll

Large diffs are not rendered by default.

518 changes: 259 additions & 259 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll

Large diffs are not rendered by default.

58 changes: 29 additions & 29 deletions llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll
Original file line number Diff line number Diff line change
@@ -1,28 +1,28 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK

declare i1 @llvm.experimental.vector.reduce.add.v1i1(<1 x i1> %a)
declare i8 @llvm.experimental.vector.reduce.add.v1i8(<1 x i8> %a)
declare i16 @llvm.experimental.vector.reduce.add.v1i16(<1 x i16> %a)
declare i24 @llvm.experimental.vector.reduce.add.v1i24(<1 x i24> %a)
declare i32 @llvm.experimental.vector.reduce.add.v1i32(<1 x i32> %a)
declare i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> %a)
declare i128 @llvm.experimental.vector.reduce.add.v1i128(<1 x i128> %a)

declare i8 @llvm.experimental.vector.reduce.add.v3i8(<3 x i8> %a)
declare i8 @llvm.experimental.vector.reduce.add.v9i8(<9 x i8> %a)
declare i32 @llvm.experimental.vector.reduce.add.v3i32(<3 x i32> %a)
declare i1 @llvm.experimental.vector.reduce.add.v4i1(<4 x i1> %a)
declare i24 @llvm.experimental.vector.reduce.add.v4i24(<4 x i24> %a)
declare i128 @llvm.experimental.vector.reduce.add.v2i128(<2 x i128> %a)
declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %a)
declare i1 @llvm.vector.reduce.add.v1i1(<1 x i1> %a)
declare i8 @llvm.vector.reduce.add.v1i8(<1 x i8> %a)
declare i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a)
declare i24 @llvm.vector.reduce.add.v1i24(<1 x i24> %a)
declare i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a)
declare i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a)
declare i128 @llvm.vector.reduce.add.v1i128(<1 x i128> %a)

declare i8 @llvm.vector.reduce.add.v3i8(<3 x i8> %a)
declare i8 @llvm.vector.reduce.add.v9i8(<9 x i8> %a)
declare i32 @llvm.vector.reduce.add.v3i32(<3 x i32> %a)
declare i1 @llvm.vector.reduce.add.v4i1(<4 x i1> %a)
declare i24 @llvm.vector.reduce.add.v4i24(<4 x i24> %a)
declare i128 @llvm.vector.reduce.add.v2i128(<2 x i128> %a)
declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a)

define i1 @test_v1i1(<1 x i1> %a) nounwind {
; CHECK-LABEL: test_v1i1:
; CHECK: // %bb.0:
; CHECK-NEXT: and w0, w0, #0x1
; CHECK-NEXT: ret
%b = call i1 @llvm.experimental.vector.reduce.add.v1i1(<1 x i1> %a)
%b = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> %a)
ret i1 %b
}

Expand All @@ -32,7 +32,7 @@ define i8 @test_v1i8(<1 x i8> %a) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: umov w0, v0.b[0]
; CHECK-NEXT: ret
%b = call i8 @llvm.experimental.vector.reduce.add.v1i8(<1 x i8> %a)
%b = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> %a)
ret i8 %b
}

Expand All @@ -42,15 +42,15 @@ define i16 @test_v1i16(<1 x i16> %a) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: umov w0, v0.h[0]
; CHECK-NEXT: ret
%b = call i16 @llvm.experimental.vector.reduce.add.v1i16(<1 x i16> %a)
%b = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a)
ret i16 %b
}

define i24 @test_v1i24(<1 x i24> %a) nounwind {
; CHECK-LABEL: test_v1i24:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%b = call i24 @llvm.experimental.vector.reduce.add.v1i24(<1 x i24> %a)
%b = call i24 @llvm.vector.reduce.add.v1i24(<1 x i24> %a)
ret i24 %b
}

Expand All @@ -60,7 +60,7 @@ define i32 @test_v1i32(<1 x i32> %a) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%b = call i32 @llvm.experimental.vector.reduce.add.v1i32(<1 x i32> %a)
%b = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a)
ret i32 %b
}

Expand All @@ -70,15 +70,15 @@ define i64 @test_v1i64(<1 x i64> %a) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%b = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> %a)
%b = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a)
ret i64 %b
}

define i128 @test_v1i128(<1 x i128> %a) nounwind {
; CHECK-LABEL: test_v1i128:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%b = call i128 @llvm.experimental.vector.reduce.add.v1i128(<1 x i128> %a)
%b = call i128 @llvm.vector.reduce.add.v1i128(<1 x i128> %a)
ret i128 %b
}

Expand All @@ -92,7 +92,7 @@ define i8 @test_v3i8(<3 x i8> %a) nounwind {
; CHECK-NEXT: addv h0, v0.4h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%b = call i8 @llvm.experimental.vector.reduce.add.v3i8(<3 x i8> %a)
%b = call i8 @llvm.vector.reduce.add.v3i8(<3 x i8> %a)
ret i8 %b
}

Expand All @@ -109,7 +109,7 @@ define i8 @test_v9i8(<9 x i8> %a) nounwind {
; CHECK-NEXT: addv b0, v0.16b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%b = call i8 @llvm.experimental.vector.reduce.add.v9i8(<9 x i8> %a)
%b = call i8 @llvm.vector.reduce.add.v9i8(<9 x i8> %a)
ret i8 %b
}

Expand All @@ -120,7 +120,7 @@ define i32 @test_v3i32(<3 x i32> %a) nounwind {
; CHECK-NEXT: addv s0, v0.4s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%b = call i32 @llvm.experimental.vector.reduce.add.v3i32(<3 x i32> %a)
%b = call i32 @llvm.vector.reduce.add.v3i32(<3 x i32> %a)
ret i32 %b
}

Expand All @@ -131,7 +131,7 @@ define i1 @test_v4i1(<4 x i1> %a) nounwind {
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
%b = call i1 @llvm.experimental.vector.reduce.add.v4i1(<4 x i1> %a)
%b = call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> %a)
ret i1 %b
}

Expand All @@ -141,7 +141,7 @@ define i24 @test_v4i24(<4 x i24> %a) nounwind {
; CHECK-NEXT: addv s0, v0.4s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%b = call i24 @llvm.experimental.vector.reduce.add.v4i24(<4 x i24> %a)
%b = call i24 @llvm.vector.reduce.add.v4i24(<4 x i24> %a)
ret i24 %b
}

Expand All @@ -151,7 +151,7 @@ define i128 @test_v2i128(<2 x i128> %a) nounwind {
; CHECK-NEXT: adds x0, x0, x2
; CHECK-NEXT: adcs x1, x1, x3
; CHECK-NEXT: ret
%b = call i128 @llvm.experimental.vector.reduce.add.v2i128(<2 x i128> %a)
%b = call i128 @llvm.vector.reduce.add.v2i128(<2 x i128> %a)
ret i128 %b
}

Expand All @@ -164,6 +164,6 @@ define i32 @test_v16i32(<16 x i32> %a) nounwind {
; CHECK-NEXT: addv s0, v0.4s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%b = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %a)
%b = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a)
ret i32 %b
}
58 changes: 29 additions & 29 deletions llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
Original file line number Diff line number Diff line change
@@ -1,28 +1,28 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK

declare i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> %a)
declare i8 @llvm.experimental.vector.reduce.and.v1i8(<1 x i8> %a)
declare i16 @llvm.experimental.vector.reduce.and.v1i16(<1 x i16> %a)
declare i24 @llvm.experimental.vector.reduce.and.v1i24(<1 x i24> %a)
declare i32 @llvm.experimental.vector.reduce.and.v1i32(<1 x i32> %a)
declare i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> %a)
declare i128 @llvm.experimental.vector.reduce.and.v1i128(<1 x i128> %a)

declare i8 @llvm.experimental.vector.reduce.and.v3i8(<3 x i8> %a)
declare i8 @llvm.experimental.vector.reduce.and.v9i8(<9 x i8> %a)
declare i32 @llvm.experimental.vector.reduce.and.v3i32(<3 x i32> %a)
declare i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> %a)
declare i24 @llvm.experimental.vector.reduce.and.v4i24(<4 x i24> %a)
declare i128 @llvm.experimental.vector.reduce.and.v2i128(<2 x i128> %a)
declare i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> %a)
declare i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %a)
declare i8 @llvm.vector.reduce.and.v1i8(<1 x i8> %a)
declare i16 @llvm.vector.reduce.and.v1i16(<1 x i16> %a)
declare i24 @llvm.vector.reduce.and.v1i24(<1 x i24> %a)
declare i32 @llvm.vector.reduce.and.v1i32(<1 x i32> %a)
declare i64 @llvm.vector.reduce.and.v1i64(<1 x i64> %a)
declare i128 @llvm.vector.reduce.and.v1i128(<1 x i128> %a)

declare i8 @llvm.vector.reduce.and.v3i8(<3 x i8> %a)
declare i8 @llvm.vector.reduce.and.v9i8(<9 x i8> %a)
declare i32 @llvm.vector.reduce.and.v3i32(<3 x i32> %a)
declare i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a)
declare i24 @llvm.vector.reduce.and.v4i24(<4 x i24> %a)
declare i128 @llvm.vector.reduce.and.v2i128(<2 x i128> %a)
declare i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %a)

define i1 @test_v1i1(<1 x i1> %a) nounwind {
; CHECK-LABEL: test_v1i1:
; CHECK: // %bb.0:
; CHECK-NEXT: and w0, w0, #0x1
; CHECK-NEXT: ret
%b = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> %a)
%b = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %a)
ret i1 %b
}

Expand All @@ -32,7 +32,7 @@ define i8 @test_v1i8(<1 x i8> %a) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: umov w0, v0.b[0]
; CHECK-NEXT: ret
%b = call i8 @llvm.experimental.vector.reduce.and.v1i8(<1 x i8> %a)
%b = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> %a)
ret i8 %b
}

Expand All @@ -42,15 +42,15 @@ define i16 @test_v1i16(<1 x i16> %a) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: umov w0, v0.h[0]
; CHECK-NEXT: ret
%b = call i16 @llvm.experimental.vector.reduce.and.v1i16(<1 x i16> %a)
%b = call i16 @llvm.vector.reduce.and.v1i16(<1 x i16> %a)
ret i16 %b
}

define i24 @test_v1i24(<1 x i24> %a) nounwind {
; CHECK-LABEL: test_v1i24:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%b = call i24 @llvm.experimental.vector.reduce.and.v1i24(<1 x i24> %a)
%b = call i24 @llvm.vector.reduce.and.v1i24(<1 x i24> %a)
ret i24 %b
}

Expand All @@ -60,7 +60,7 @@ define i32 @test_v1i32(<1 x i32> %a) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%b = call i32 @llvm.experimental.vector.reduce.and.v1i32(<1 x i32> %a)
%b = call i32 @llvm.vector.reduce.and.v1i32(<1 x i32> %a)
ret i32 %b
}

Expand All @@ -70,15 +70,15 @@ define i64 @test_v1i64(<1 x i64> %a) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%b = call i64 @llvm.experimental.vector.reduce.and.v1i64(<1 x i64> %a)
%b = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> %a)
ret i64 %b
}

define i128 @test_v1i128(<1 x i128> %a) nounwind {
; CHECK-LABEL: test_v1i128:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%b = call i128 @llvm.experimental.vector.reduce.and.v1i128(<1 x i128> %a)
%b = call i128 @llvm.vector.reduce.and.v1i128(<1 x i128> %a)
ret i128 %b
}

Expand All @@ -89,7 +89,7 @@ define i8 @test_v3i8(<3 x i8> %a) nounwind {
; CHECK-NEXT: and w8, w8, w2
; CHECK-NEXT: and w0, w8, #0xff
; CHECK-NEXT: ret
%b = call i8 @llvm.experimental.vector.reduce.and.v3i8(<3 x i8> %a)
%b = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> %a)
ret i8 %b
}

Expand Down Expand Up @@ -120,7 +120,7 @@ define i8 @test_v9i8(<9 x i8> %a) nounwind {
; CHECK-NEXT: umov w9, v0.b[7]
; CHECK-NEXT: and w0, w8, w9
; CHECK-NEXT: ret
%b = call i8 @llvm.experimental.vector.reduce.and.v9i8(<9 x i8> %a)
%b = call i8 @llvm.vector.reduce.and.v9i8(<9 x i8> %a)
ret i8 %b
}

Expand All @@ -133,7 +133,7 @@ define i32 @test_v3i32(<3 x i32> %a) nounwind {
; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: and w0, w9, w8
; CHECK-NEXT: ret
%b = call i32 @llvm.experimental.vector.reduce.and.v3i32(<3 x i32> %a)
%b = call i32 @llvm.vector.reduce.and.v3i32(<3 x i32> %a)
ret i32 %b
}

Expand All @@ -150,7 +150,7 @@ define i1 @test_v4i1(<4 x i1> %a) nounwind {
; CHECK-NEXT: and w8, w9, w8
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
%b = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> %a)
%b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a)
ret i1 %b
}

Expand All @@ -163,7 +163,7 @@ define i24 @test_v4i24(<4 x i24> %a) nounwind {
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: and w0, w9, w8
; CHECK-NEXT: ret
%b = call i24 @llvm.experimental.vector.reduce.and.v4i24(<4 x i24> %a)
%b = call i24 @llvm.vector.reduce.and.v4i24(<4 x i24> %a)
ret i24 %b
}

Expand All @@ -173,7 +173,7 @@ define i128 @test_v2i128(<2 x i128> %a) nounwind {
; CHECK-NEXT: and x0, x0, x2
; CHECK-NEXT: and x1, x1, x3
; CHECK-NEXT: ret
%b = call i128 @llvm.experimental.vector.reduce.and.v2i128(<2 x i128> %a)
%b = call i128 @llvm.vector.reduce.and.v2i128(<2 x i128> %a)
ret i128 %b
}

Expand All @@ -189,6 +189,6 @@ define i32 @test_v16i32(<16 x i32> %a) nounwind {
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: and w0, w9, w8
; CHECK-NEXT: ret
%b = call i32 @llvm.experimental.vector.reduce.and.v16i32(<16 x i32> %a)
%b = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %a)
ret i32 %b
}
48 changes: 24 additions & 24 deletions llvm/test/CodeGen/AArch64/vecreduce-bool.ll
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK

declare i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> %a)
declare i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> %a)
declare i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> %a)
declare i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> %a)
declare i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> %a)
declare i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> %a)
declare i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %a)
declare i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %a)
declare i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a)
declare i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
declare i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a)
declare i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a)

declare i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> %a)
declare i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> %a)
declare i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> %a)
declare i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a)
declare i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a)
declare i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %a)
declare i1 @llvm.vector.reduce.or.v1i1(<1 x i1> %a)
declare i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %a)
declare i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %a)
declare i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a)
declare i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %a)
declare i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %a)

define i32 @reduce_and_v1(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-LABEL: reduce_and_v1:
Expand All @@ -24,7 +24,7 @@ define i32 @reduce_and_v1(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-NEXT: csel w0, w0, w1, lt
; CHECK-NEXT: ret
%x = icmp slt <1 x i8> %a0, zeroinitializer
%y = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> %x)
%y = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %x)
%z = select i1 %y, i32 %a1, i32 %a2
ret i32 %z
}
Expand All @@ -41,7 +41,7 @@ define i32 @reduce_and_v2(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-NEXT: csel w0, w0, w1, ne
; CHECK-NEXT: ret
%x = icmp slt <2 x i8> %a0, zeroinitializer
%y = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> %x)
%y = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %x)
%z = select i1 %y, i32 %a1, i32 %a2
ret i32 %z
}
Expand All @@ -58,7 +58,7 @@ define i32 @reduce_and_v4(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-NEXT: csel w0, w0, w1, ne
; CHECK-NEXT: ret
%x = icmp slt <4 x i8> %a0, zeroinitializer
%y = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> %x)
%y = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %x)
%z = select i1 %y, i32 %a1, i32 %a2
ret i32 %z
}
Expand All @@ -73,7 +73,7 @@ define i32 @reduce_and_v8(<8 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-NEXT: csel w0, w0, w1, ne
; CHECK-NEXT: ret
%x = icmp slt <8 x i8> %a0, zeroinitializer
%y = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> %x)
%y = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %x)
%z = select i1 %y, i32 %a1, i32 %a2
ret i32 %z
}
Expand All @@ -88,7 +88,7 @@ define i32 @reduce_and_v16(<16 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-NEXT: csel w0, w0, w1, ne
; CHECK-NEXT: ret
%x = icmp slt <16 x i8> %a0, zeroinitializer
%y = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> %x)
%y = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %x)
%z = select i1 %y, i32 %a1, i32 %a2
ret i32 %z
}
Expand All @@ -105,7 +105,7 @@ define i32 @reduce_and_v32(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-NEXT: csel w0, w0, w1, ne
; CHECK-NEXT: ret
%x = icmp slt <32 x i8> %a0, zeroinitializer
%y = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> %x)
%y = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %x)
%z = select i1 %y, i32 %a1, i32 %a2
ret i32 %z
}
Expand All @@ -119,7 +119,7 @@ define i32 @reduce_or_v1(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-NEXT: csel w0, w0, w1, lt
; CHECK-NEXT: ret
%x = icmp slt <1 x i8> %a0, zeroinitializer
%y = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> %x)
%y = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> %x)
%z = select i1 %y, i32 %a1, i32 %a2
ret i32 %z
}
Expand All @@ -136,7 +136,7 @@ define i32 @reduce_or_v2(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-NEXT: csel w0, w0, w1, ne
; CHECK-NEXT: ret
%x = icmp slt <2 x i8> %a0, zeroinitializer
%y = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> %x)
%y = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %x)
%z = select i1 %y, i32 %a1, i32 %a2
ret i32 %z
}
Expand All @@ -153,7 +153,7 @@ define i32 @reduce_or_v4(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-NEXT: csel w0, w0, w1, ne
; CHECK-NEXT: ret
%x = icmp slt <4 x i8> %a0, zeroinitializer
%y = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> %x)
%y = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %x)
%z = select i1 %y, i32 %a1, i32 %a2
ret i32 %z
}
Expand All @@ -168,7 +168,7 @@ define i32 @reduce_or_v8(<8 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-NEXT: csel w0, w0, w1, ne
; CHECK-NEXT: ret
%x = icmp slt <8 x i8> %a0, zeroinitializer
%y = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %x)
%y = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %x)
%z = select i1 %y, i32 %a1, i32 %a2
ret i32 %z
}
Expand All @@ -183,7 +183,7 @@ define i32 @reduce_or_v16(<16 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-NEXT: csel w0, w0, w1, ne
; CHECK-NEXT: ret
%x = icmp slt <16 x i8> %a0, zeroinitializer
%y = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %x)
%y = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %x)
%z = select i1 %y, i32 %a1, i32 %a2
ret i32 %z
}
Expand All @@ -200,7 +200,7 @@ define i32 @reduce_or_v32(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind {
; CHECK-NEXT: csel w0, w0, w1, ne
; CHECK-NEXT: ret
%x = icmp slt <32 x i8> %a0, zeroinitializer
%y = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %x)
%y = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %x)
%z = select i1 %y, i32 %a1, i32 %a2
ret i32 %z
}
28 changes: 14 additions & 14 deletions llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@

; Same as vecreduce-fadd-legalization.ll, but without fmf.

declare half @llvm.experimental.vector.reduce.v2.fadd.f16.v1f16(half, <1 x half>)
declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v1f32(float, <1 x float>)
declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v1f64(double, <1 x double>)
declare fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v1f128(fp128, <1 x fp128>)
declare half @llvm.vector.reduce.fadd.f16.v1f16(half, <1 x half>)
declare float @llvm.vector.reduce.fadd.f32.v1f32(float, <1 x float>)
declare double @llvm.vector.reduce.fadd.f64.v1f64(double, <1 x double>)
declare fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128, <1 x fp128>)

declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v3f32(float, <3 x float>)
declare fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v2f128(fp128, <2 x fp128>)
declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float, <16 x float>)
declare float @llvm.vector.reduce.fadd.f32.v3f32(float, <3 x float>)
declare fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128, <2 x fp128>)
declare float @llvm.vector.reduce.fadd.f32.v16f32(float, <16 x float>)

define half @test_v1f16(<1 x half> %a) nounwind {
; CHECK-LABEL: test_v1f16:
Expand All @@ -20,7 +20,7 @@ define half @test_v1f16(<1 x half> %a) nounwind {
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: ret
%b = call half @llvm.experimental.vector.reduce.v2.fadd.f16.v1f16(half 0.0, <1 x half> %a)
%b = call half @llvm.vector.reduce.fadd.f16.v1f16(half 0.0, <1 x half> %a)
ret half %b
}

Expand All @@ -31,7 +31,7 @@ define float @test_v1f32(<1 x float> %a) nounwind {
; CHECK-NEXT: fmov s1, wzr
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: ret
%b = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v1f32(float 0.0, <1 x float> %a)
%b = call float @llvm.vector.reduce.fadd.f32.v1f32(float 0.0, <1 x float> %a)
ret float %b
}

Expand All @@ -41,7 +41,7 @@ define double @test_v1f64(<1 x double> %a) nounwind {
; CHECK-NEXT: fmov d1, xzr
; CHECK-NEXT: fadd d0, d0, d1
; CHECK-NEXT: ret
%b = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v1f64(double 0.0, <1 x double> %a)
%b = call double @llvm.vector.reduce.fadd.f64.v1f64(double 0.0, <1 x double> %a)
ret double %b
}

Expand All @@ -54,7 +54,7 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
; CHECK-NEXT: bl __addtf3
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%b = call fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v1f128(fp128 zeroinitializer, <1 x fp128> %a)
%b = call fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128 zeroinitializer, <1 x fp128> %a)
ret fp128 %b
}

Expand All @@ -68,7 +68,7 @@ define float @test_v3f32(<3 x float> %a) nounwind {
; CHECK-NEXT: mov s0, v0.s[2]
; CHECK-NEXT: fadd s0, s1, s0
; CHECK-NEXT: ret
%b = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v3f32(float 0.0, <3 x float> %a)
%b = call float @llvm.vector.reduce.fadd.f32.v3f32(float 0.0, <3 x float> %a)
ret float %b
}

Expand All @@ -86,7 +86,7 @@ define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-NEXT: add sp, sp, #32 // =32
; CHECK-NEXT: ret
%b = call fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a)
%b = call fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a)
ret fp128 %b
}

Expand Down Expand Up @@ -123,6 +123,6 @@ define float @test_v16f32(<16 x float> %a) nounwind {
; CHECK-NEXT: mov s1, v3.s[3]
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: ret
%b = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float 0.0, <16 x float> %a)
%b = call float @llvm.vector.reduce.fadd.f32.v16f32(float 0.0, <16 x float> %a)
ret float %b
}
28 changes: 14 additions & 14 deletions llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK

declare half @llvm.experimental.vector.reduce.v2.fadd.f16.v1f16(half, <1 x half>)
declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v1f32(float, <1 x float>)
declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v1f64(double, <1 x double>)
declare fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v1f128(fp128, <1 x fp128>)
declare half @llvm.vector.reduce.fadd.f16.v1f16(half, <1 x half>)
declare float @llvm.vector.reduce.fadd.f32.v1f32(float, <1 x float>)
declare double @llvm.vector.reduce.fadd.f64.v1f64(double, <1 x double>)
declare fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128, <1 x fp128>)

declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v3f32(float, <3 x float>)
declare fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v2f128(fp128, <2 x fp128>)
declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float, <16 x float>)
declare float @llvm.vector.reduce.fadd.f32.v3f32(float, <3 x float>)
declare fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128, <2 x fp128>)
declare float @llvm.vector.reduce.fadd.f32.v16f32(float, <16 x float>)

define half @test_v1f16(<1 x half> %a) nounwind {
; CHECK-LABEL: test_v1f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%b = call fast nnan half @llvm.experimental.vector.reduce.v2.fadd.f16.v1f16(half 0.0, <1 x half> %a)
%b = call fast nnan half @llvm.vector.reduce.fadd.f16.v1f16(half 0.0, <1 x half> %a)
ret half %b
}

Expand All @@ -24,23 +24,23 @@ define float @test_v1f32(<1 x float> %a) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: ret
%b = call fast nnan float @llvm.experimental.vector.reduce.v2.fadd.f32.v1f32(float 0.0, <1 x float> %a)
%b = call fast nnan float @llvm.vector.reduce.fadd.f32.v1f32(float 0.0, <1 x float> %a)
ret float %b
}

define double @test_v1f64(<1 x double> %a) nounwind {
; CHECK-LABEL: test_v1f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%b = call fast nnan double @llvm.experimental.vector.reduce.v2.fadd.f64.v1f64(double 0.0, <1 x double> %a)
%b = call fast nnan double @llvm.vector.reduce.fadd.f64.v1f64(double 0.0, <1 x double> %a)
ret double %b
}

define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
; CHECK-LABEL: test_v1f128:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%b = call fast nnan fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v1f128(fp128 zeroinitializer, <1 x fp128> %a)
%b = call fast nnan fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128 zeroinitializer, <1 x fp128> %a)
ret fp128 %b
}

Expand All @@ -53,7 +53,7 @@ define float @test_v3f32(<3 x float> %a) nounwind {
; CHECK-NEXT: fadd v0.2s, v0.2s, v1.2s
; CHECK-NEXT: faddp s0, v0.2s
; CHECK-NEXT: ret
%b = call fast nnan float @llvm.experimental.vector.reduce.v2.fadd.f32.v3f32(float 0.0, <3 x float> %a)
%b = call fast nnan float @llvm.vector.reduce.fadd.f32.v3f32(float 0.0, <3 x float> %a)
ret float %b
}

Expand All @@ -64,7 +64,7 @@ define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
; CHECK-NEXT: bl __addtf3
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%b = call fast nnan fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a)
%b = call fast nnan fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a)
ret fp128 %b
}

Expand All @@ -78,6 +78,6 @@ define float @test_v16f32(<16 x float> %a) nounwind {
; CHECK-NEXT: fadd v0.2s, v0.2s, v1.2s
; CHECK-NEXT: faddp s0, v0.2s
; CHECK-NEXT: ret
%b = call fast nnan float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float 0.0, <16 x float> %a)
%b = call fast nnan float @llvm.vector.reduce.fadd.f32.v16f32(float 0.0, <16 x float> %a)
ret float %b
}
32 changes: 16 additions & 16 deletions llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ define float @add_HalfS(<2 x float> %bin.rdx) {
; CHECKNOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECKNOFP16-NEXT: faddp s0, v0.2s
; CHECKNOFP16-NEXT: ret
%r = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v2f32(float 0.0, <2 x float> %bin.rdx)
%r = call fast float @llvm.vector.reduce.fadd.f32.v2f32(float 0.0, <2 x float> %bin.rdx)
ret float %r
}

Expand Down Expand Up @@ -48,7 +48,7 @@ define half @add_HalfH(<4 x half> %bin.rdx) {
; CHECKNOFP16-NEXT: fadd s0, s0, s1
; CHECKNOFP16-NEXT: fcvt h0, s0
; CHECKNOFP16-NEXT: ret
%r = call fast half @llvm.experimental.vector.reduce.v2.fadd.f16.v4f16(half 0.0, <4 x half> %bin.rdx)
%r = call fast half @llvm.vector.reduce.fadd.f16.v4f16(half 0.0, <4 x half> %bin.rdx)
ret half %r
}

Expand Down Expand Up @@ -103,7 +103,7 @@ define half @add_H(<8 x half> %bin.rdx) {
; CHECKNOFP16-NEXT: fadd s0, s0, s1
; CHECKNOFP16-NEXT: fcvt h0, s0
; CHECKNOFP16-NEXT: ret
%r = call fast half @llvm.experimental.vector.reduce.v2.fadd.f16.v8f16(half 0.0, <8 x half> %bin.rdx)
%r = call fast half @llvm.vector.reduce.fadd.f16.v8f16(half 0.0, <8 x half> %bin.rdx)
ret half %r
}

Expand All @@ -121,7 +121,7 @@ define float @add_S(<4 x float> %bin.rdx) {
; CHECKNOFP16-NEXT: fadd v0.2s, v0.2s, v1.2s
; CHECKNOFP16-NEXT: faddp s0, v0.2s
; CHECKNOFP16-NEXT: ret
%r = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.0, <4 x float> %bin.rdx)
%r = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float 0.0, <4 x float> %bin.rdx)
ret float %r
}

Expand All @@ -135,7 +135,7 @@ define double @add_D(<2 x double> %bin.rdx) {
; CHECKNOFP16: // %bb.0:
; CHECKNOFP16-NEXT: faddp d0, v0.2d
; CHECKNOFP16-NEXT: ret
%r = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double 0.0, <2 x double> %bin.rdx)
%r = call fast double @llvm.vector.reduce.fadd.f64.v2f64(double 0.0, <2 x double> %bin.rdx)
ret double %r
}

Expand Down Expand Up @@ -229,7 +229,7 @@ define half @add_2H(<16 x half> %bin.rdx) {
; CHECKNOFP16-NEXT: fadd s0, s1, s0
; CHECKNOFP16-NEXT: fcvt h0, s0
; CHECKNOFP16-NEXT: ret
%r = call fast half @llvm.experimental.vector.reduce.v2.fadd.f16.v16f16(half 0.0, <16 x half> %bin.rdx)
%r = call fast half @llvm.vector.reduce.fadd.f16.v16f16(half 0.0, <16 x half> %bin.rdx)
ret half %r
}

Expand All @@ -249,7 +249,7 @@ define float @add_2S(<8 x float> %bin.rdx) {
; CHECKNOFP16-NEXT: fadd v0.2s, v0.2s, v1.2s
; CHECKNOFP16-NEXT: faddp s0, v0.2s
; CHECKNOFP16-NEXT: ret
%r = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float 0.0, <8 x float> %bin.rdx)
%r = call fast float @llvm.vector.reduce.fadd.f32.v8f32(float 0.0, <8 x float> %bin.rdx)
ret float %r
}

Expand All @@ -265,16 +265,16 @@ define double @add_2D(<4 x double> %bin.rdx) {
; CHECKNOFP16-NEXT: fadd v0.2d, v0.2d, v1.2d
; CHECKNOFP16-NEXT: faddp d0, v0.2d
; CHECKNOFP16-NEXT: ret
%r = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double 0.0, <4 x double> %bin.rdx)
%r = call fast double @llvm.vector.reduce.fadd.f64.v4f64(double 0.0, <4 x double> %bin.rdx)
ret double %r
}

; Function Attrs: nounwind readnone
declare half @llvm.experimental.vector.reduce.v2.fadd.f16.v4f16(half, <4 x half>)
declare half @llvm.experimental.vector.reduce.v2.fadd.f16.v8f16(half, <8 x half>)
declare half @llvm.experimental.vector.reduce.v2.fadd.f16.v16f16(half, <16 x half>)
declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v2f32(float, <2 x float>)
declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float, <4 x float>)
declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float, <8 x float>)
declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double, <2 x double>)
declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v4f64(double, <4 x double>)
declare half @llvm.vector.reduce.fadd.f16.v4f16(half, <4 x half>)
declare half @llvm.vector.reduce.fadd.f16.v8f16(half, <8 x half>)
declare half @llvm.vector.reduce.fadd.f16.v16f16(half, <16 x half>)
declare float @llvm.vector.reduce.fadd.f32.v2f32(float, <2 x float>)
declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
declare float @llvm.vector.reduce.fadd.f32.v8f32(float, <8 x float>)
declare double @llvm.vector.reduce.fadd.f64.v2f64(double, <2 x double>)
declare double @llvm.vector.reduce.fadd.f64.v4f64(double, <4 x double>)
28 changes: 14 additions & 14 deletions llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK

declare half @llvm.experimental.vector.reduce.fmax.v1f16(<1 x half> %a)
declare float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> %a)
declare double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> %a)
declare fp128 @llvm.experimental.vector.reduce.fmax.v1f128(<1 x fp128> %a)
declare half @llvm.vector.reduce.fmax.v1f16(<1 x half> %a)
declare float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a)
declare double @llvm.vector.reduce.fmax.v1f64(<1 x double> %a)
declare fp128 @llvm.vector.reduce.fmax.v1f128(<1 x fp128> %a)

declare float @llvm.experimental.vector.reduce.fmax.v3f32(<3 x float> %a)
declare fp128 @llvm.experimental.vector.reduce.fmax.v2f128(<2 x fp128> %a)
declare float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> %a)
declare float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a)
declare fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128> %a)
declare float @llvm.vector.reduce.fmax.v16f32(<16 x float> %a)

define half @test_v1f16(<1 x half> %a) nounwind {
; CHECK-LABEL: test_v1f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%b = call half @llvm.experimental.vector.reduce.fmax.v1f16(<1 x half> %a)
%b = call half @llvm.vector.reduce.fmax.v1f16(<1 x half> %a)
ret half %b
}

Expand All @@ -24,38 +24,38 @@ define float @test_v1f32(<1 x float> %a) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: ret
%b = call float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> %a)
%b = call float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a)
ret float %b
}

define double @test_v1f64(<1 x double> %a) nounwind {
; CHECK-LABEL: test_v1f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%b = call double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> %a)
%b = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> %a)
ret double %b
}

define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
; CHECK-LABEL: test_v1f128:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%b = call fp128 @llvm.experimental.vector.reduce.fmax.v1f128(<1 x fp128> %a)
%b = call fp128 @llvm.vector.reduce.fmax.v1f128(<1 x fp128> %a)
ret fp128 %b
}

; TODO: This doesn't work, because ExpandReductions only supports power of two
; unordered reductions.
;define float @test_v3f32(<3 x float> %a) nounwind {
; %b = call float @llvm.experimental.vector.reduce.fmax.v3f32(<3 x float> %a)
; %b = call float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a)
; ret float %b
;}

define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
; CHECK-LABEL: test_v2f128:
; CHECK: // %bb.0:
; CHECK-NEXT: b fmaxl
%b = call fp128 @llvm.experimental.vector.reduce.fmax.v2f128(<2 x fp128> %a)
%b = call fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128> %a)
ret fp128 %b
}

Expand All @@ -67,6 +67,6 @@ define float @test_v16f32(<16 x float> %a) nounwind {
; CHECK-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
; CHECK-NEXT: fmaxnmv s0, v0.4s
; CHECK-NEXT: ret
%b = call float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> %a)
%b = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %a)
ret float %b
}
30 changes: 15 additions & 15 deletions llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK

declare half @llvm.experimental.vector.reduce.fmax.v1f16(<1 x half> %a)
declare float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> %a)
declare double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> %a)
declare fp128 @llvm.experimental.vector.reduce.fmax.v1f128(<1 x fp128> %a)
declare half @llvm.vector.reduce.fmax.v1f16(<1 x half> %a)
declare float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a)
declare double @llvm.vector.reduce.fmax.v1f64(<1 x double> %a)
declare fp128 @llvm.vector.reduce.fmax.v1f128(<1 x fp128> %a)

declare float @llvm.experimental.vector.reduce.fmax.v3f32(<3 x float> %a)
declare fp128 @llvm.experimental.vector.reduce.fmax.v2f128(<2 x fp128> %a)
declare float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> %a)
declare float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a)
declare fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128> %a)
declare float @llvm.vector.reduce.fmax.v16f32(<16 x float> %a)

define half @test_v1f16(<1 x half> %a) nounwind {
; CHECK-LABEL: test_v1f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%b = call nnan half @llvm.experimental.vector.reduce.fmax.v1f16(<1 x half> %a)
%b = call nnan half @llvm.vector.reduce.fmax.v1f16(<1 x half> %a)
ret half %b
}

Expand All @@ -24,23 +24,23 @@ define float @test_v1f32(<1 x float> %a) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: ret
%b = call nnan float @llvm.experimental.vector.reduce.fmax.v1f32(<1 x float> %a)
%b = call nnan float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a)
ret float %b
}

define double @test_v1f64(<1 x double> %a) nounwind {
; CHECK-LABEL: test_v1f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%b = call nnan double @llvm.experimental.vector.reduce.fmax.v1f64(<1 x double> %a)
%b = call nnan double @llvm.vector.reduce.fmax.v1f64(<1 x double> %a)
ret double %b
}

define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
; CHECK-LABEL: test_v1f128:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%b = call nnan fp128 @llvm.experimental.vector.reduce.fmax.v1f128(<1 x fp128> %a)
%b = call nnan fp128 @llvm.vector.reduce.fmax.v1f128(<1 x fp128> %a)
ret fp128 %b
}

Expand All @@ -52,7 +52,7 @@ define float @test_v3f32(<3 x float> %a) nounwind {
; CHECK-NEXT: mov v0.s[3], v1.s[0]
; CHECK-NEXT: fmaxnmv s0, v0.4s
; CHECK-NEXT: ret
%b = call nnan float @llvm.experimental.vector.reduce.fmax.v3f32(<3 x float> %a)
%b = call nnan float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a)
ret float %b
}

Expand All @@ -64,15 +64,15 @@ define float @test_v3f32_ninf(<3 x float> %a) nounwind {
; CHECK-NEXT: mov v0.s[3], v1.s[0]
; CHECK-NEXT: fmaxnmv s0, v0.4s
; CHECK-NEXT: ret
%b = call nnan ninf float @llvm.experimental.vector.reduce.fmax.v3f32(<3 x float> %a)
%b = call nnan ninf float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a)
ret float %b
}

define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
; CHECK-LABEL: test_v2f128:
; CHECK: // %bb.0:
; CHECK-NEXT: b fmaxl
%b = call nnan fp128 @llvm.experimental.vector.reduce.fmax.v2f128(<2 x fp128> %a)
%b = call nnan fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128> %a)
ret fp128 %b
}

Expand All @@ -84,6 +84,6 @@ define float @test_v16f32(<16 x float> %a) nounwind {
; CHECK-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
; CHECK-NEXT: fmaxnmv s0, v0.4s
; CHECK-NEXT: ret
%b = call nnan float @llvm.experimental.vector.reduce.fmax.v16f32(<16 x float> %a)
%b = call nnan float @llvm.vector.reduce.fmax.v16f32(<16 x float> %a)
ret float %b
}
30 changes: 15 additions & 15 deletions llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK

declare half @llvm.experimental.vector.reduce.fmin.v1f16(<1 x half> %a)
declare float @llvm.experimental.vector.reduce.fmin.v1f32(<1 x float> %a)
declare double @llvm.experimental.vector.reduce.fmin.v1f64(<1 x double> %a)
declare fp128 @llvm.experimental.vector.reduce.fmin.v1f128(<1 x fp128> %a)
declare half @llvm.vector.reduce.fmin.v1f16(<1 x half> %a)
declare float @llvm.vector.reduce.fmin.v1f32(<1 x float> %a)
declare double @llvm.vector.reduce.fmin.v1f64(<1 x double> %a)
declare fp128 @llvm.vector.reduce.fmin.v1f128(<1 x fp128> %a)

declare float @llvm.experimental.vector.reduce.fmin.v3f32(<3 x float> %a)
declare fp128 @llvm.experimental.vector.reduce.fmin.v2f128(<2 x fp128> %a)
declare float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float> %a)
declare float @llvm.vector.reduce.fmin.v3f32(<3 x float> %a)
declare fp128 @llvm.vector.reduce.fmin.v2f128(<2 x fp128> %a)
declare float @llvm.vector.reduce.fmin.v16f32(<16 x float> %a)

define half @test_v1f16(<1 x half> %a) nounwind {
; CHECK-LABEL: test_v1f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%b = call nnan half @llvm.experimental.vector.reduce.fmin.v1f16(<1 x half> %a)
%b = call nnan half @llvm.vector.reduce.fmin.v1f16(<1 x half> %a)
ret half %b
}

Expand All @@ -24,23 +24,23 @@ define float @test_v1f32(<1 x float> %a) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: ret
%b = call nnan float @llvm.experimental.vector.reduce.fmin.v1f32(<1 x float> %a)
%b = call nnan float @llvm.vector.reduce.fmin.v1f32(<1 x float> %a)
ret float %b
}

define double @test_v1f64(<1 x double> %a) nounwind {
; CHECK-LABEL: test_v1f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%b = call nnan double @llvm.experimental.vector.reduce.fmin.v1f64(<1 x double> %a)
%b = call nnan double @llvm.vector.reduce.fmin.v1f64(<1 x double> %a)
ret double %b
}

define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
; CHECK-LABEL: test_v1f128:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%b = call nnan fp128 @llvm.experimental.vector.reduce.fmin.v1f128(<1 x fp128> %a)
%b = call nnan fp128 @llvm.vector.reduce.fmin.v1f128(<1 x fp128> %a)
ret fp128 %b
}

Expand All @@ -52,7 +52,7 @@ define float @test_v3f32(<3 x float> %a) nounwind {
; CHECK-NEXT: mov v0.s[3], v1.s[0]
; CHECK-NEXT: fminnmv s0, v0.4s
; CHECK-NEXT: ret
%b = call nnan float @llvm.experimental.vector.reduce.fmin.v3f32(<3 x float> %a)
%b = call nnan float @llvm.vector.reduce.fmin.v3f32(<3 x float> %a)
ret float %b
}

Expand All @@ -64,15 +64,15 @@ define float @test_v3f32_ninf(<3 x float> %a) nounwind {
; CHECK-NEXT: mov v0.s[3], v1.s[0]
; CHECK-NEXT: fminnmv s0, v0.4s
; CHECK-NEXT: ret
%b = call nnan ninf float @llvm.experimental.vector.reduce.fmin.v3f32(<3 x float> %a)
%b = call nnan ninf float @llvm.vector.reduce.fmin.v3f32(<3 x float> %a)
ret float %b
}

define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
; CHECK-LABEL: test_v2f128:
; CHECK: // %bb.0:
; CHECK-NEXT: b fminl
%b = call nnan fp128 @llvm.experimental.vector.reduce.fmin.v2f128(<2 x fp128> %a)
%b = call nnan fp128 @llvm.vector.reduce.fmin.v2f128(<2 x fp128> %a)
ret fp128 %b
}

Expand All @@ -84,6 +84,6 @@ define float @test_v16f32(<16 x float> %a) nounwind {
; CHECK-NEXT: fminnm v0.4s, v0.4s, v1.4s
; CHECK-NEXT: fminnmv s0, v0.4s
; CHECK-NEXT: ret
%b = call nnan float @llvm.experimental.vector.reduce.fmin.v16f32(<16 x float> %a)
%b = call nnan float @llvm.vector.reduce.fmin.v16f32(<16 x float> %a)
ret float %b
}
28 changes: 14 additions & 14 deletions llvm/test/CodeGen/AArch64/vecreduce-fmul-legalization-strict.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@

; Same as vecreduce-fmul-legalization.ll, but without fmf.

declare half @llvm.experimental.vector.reduce.v2.fmul.f16.v1f16(half, <1 x half>)
declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v1f32(float, <1 x float>)
declare double @llvm.experimental.vector.reduce.v2.fmul.f64.v1f64(double, <1 x double>)
declare fp128 @llvm.experimental.vector.reduce.v2.fmul.f128.v1f128(fp128, <1 x fp128>)
declare half @llvm.vector.reduce.fmul.f16.v1f16(half, <1 x half>)
declare float @llvm.vector.reduce.fmul.f32.v1f32(float, <1 x float>)
declare double @llvm.vector.reduce.fmul.f64.v1f64(double, <1 x double>)
declare fp128 @llvm.vector.reduce.fmul.f128.v1f128(fp128, <1 x fp128>)

declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v3f32(float, <3 x float>)
declare fp128 @llvm.experimental.vector.reduce.v2.fmul.f128.v2f128(fp128, <2 x fp128>)
declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v16f32(float, <16 x float>)
declare float @llvm.vector.reduce.fmul.f32.v3f32(float, <3 x float>)
declare fp128 @llvm.vector.reduce.fmul.f128.v2f128(fp128, <2 x fp128>)
declare float @llvm.vector.reduce.fmul.f32.v16f32(float, <16 x float>)

define half @test_v1f16(<1 x half> %a) nounwind {
; CHECK-LABEL: test_v1f16:
Expand All @@ -20,7 +20,7 @@ define half @test_v1f16(<1 x half> %a) nounwind {
; CHECK-NEXT: fmul s0, s0, s1
; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: ret
%b = call half @llvm.experimental.vector.reduce.v2.fmul.f16.v1f16(half 0.0, <1 x half> %a)
%b = call half @llvm.vector.reduce.fmul.f16.v1f16(half 0.0, <1 x half> %a)
ret half %b
}

Expand All @@ -31,7 +31,7 @@ define float @test_v1f32(<1 x float> %a) nounwind {
; CHECK-NEXT: fmov s1, wzr
; CHECK-NEXT: fmul s0, s1, v0.s[0]
; CHECK-NEXT: ret
%b = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v1f32(float 0.0, <1 x float> %a)
%b = call float @llvm.vector.reduce.fmul.f32.v1f32(float 0.0, <1 x float> %a)
ret float %b
}

Expand All @@ -41,7 +41,7 @@ define double @test_v1f64(<1 x double> %a) nounwind {
; CHECK-NEXT: fmov d1, xzr
; CHECK-NEXT: fmul d0, d0, d1
; CHECK-NEXT: ret
%b = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v1f64(double 0.0, <1 x double> %a)
%b = call double @llvm.vector.reduce.fmul.f64.v1f64(double 0.0, <1 x double> %a)
ret double %b
}

Expand All @@ -54,7 +54,7 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
; CHECK-NEXT: bl __multf3
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%b = call fp128 @llvm.experimental.vector.reduce.v2.fmul.f128.v1f128(fp128 zeroinitializer, <1 x fp128> %a)
%b = call fp128 @llvm.vector.reduce.fmul.f128.v1f128(fp128 zeroinitializer, <1 x fp128> %a)
ret fp128 %b
}

Expand All @@ -66,7 +66,7 @@ define float @test_v3f32(<3 x float> %a) nounwind {
; CHECK-NEXT: fmul s1, s1, v0.s[1]
; CHECK-NEXT: fmul s0, s1, v0.s[2]
; CHECK-NEXT: ret
%b = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v3f32(float 0.0, <3 x float> %a)
%b = call float @llvm.vector.reduce.fmul.f32.v3f32(float 0.0, <3 x float> %a)
ret float %b
}

Expand All @@ -84,7 +84,7 @@ define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-NEXT: add sp, sp, #32 // =32
; CHECK-NEXT: ret
%b = call fp128 @llvm.experimental.vector.reduce.v2.fmul.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a)
%b = call fp128 @llvm.vector.reduce.fmul.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a)
ret fp128 %b
}

Expand All @@ -109,6 +109,6 @@ define float @test_v16f32(<16 x float> %a) nounwind {
; CHECK-NEXT: fmul s0, s0, v3.s[2]
; CHECK-NEXT: fmul s0, s0, v3.s[3]
; CHECK-NEXT: ret
%b = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v16f32(float 0.0, <16 x float> %a)
%b = call float @llvm.vector.reduce.fmul.f32.v16f32(float 0.0, <16 x float> %a)
ret float %b
}
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/vecreduce-propagate-sd-flags.ll
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ entry:
%1 = insertelement <4 x double> %0, double 1.0, i32 1
%2 = insertelement <4 x double> %1, double 1.0, i32 2
%3 = insertelement <4 x double> %2, double 1.0, i32 3
%4 = call nnan reassoc double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double> %3)
%4 = call nnan reassoc double @llvm.vector.reduce.fmax.v4f64(<4 x double> %3)
ret double %4
}

declare double @llvm.experimental.vector.reduce.fmax.v4f64(<4 x double>)
declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>)
62 changes: 31 additions & 31 deletions llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
Original file line number Diff line number Diff line change
@@ -1,29 +1,29 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK

declare i1 @llvm.experimental.vector.reduce.umax.v1i1(<1 x i1> %a)
declare i8 @llvm.experimental.vector.reduce.umax.v1i8(<1 x i8> %a)
declare i16 @llvm.experimental.vector.reduce.umax.v1i16(<1 x i16> %a)
declare i24 @llvm.experimental.vector.reduce.umax.v1i24(<1 x i24> %a)
declare i32 @llvm.experimental.vector.reduce.umax.v1i32(<1 x i32> %a)
declare i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> %a)
declare i128 @llvm.experimental.vector.reduce.umax.v1i128(<1 x i128> %a)

declare i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> %a)
declare i8 @llvm.experimental.vector.reduce.umax.v3i8(<3 x i8> %a)
declare i8 @llvm.experimental.vector.reduce.umax.v9i8(<9 x i8> %a)
declare i32 @llvm.experimental.vector.reduce.umax.v3i32(<3 x i32> %a)
declare i1 @llvm.experimental.vector.reduce.umax.v4i1(<4 x i1> %a)
declare i24 @llvm.experimental.vector.reduce.umax.v4i24(<4 x i24> %a)
declare i128 @llvm.experimental.vector.reduce.umax.v2i128(<2 x i128> %a)
declare i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> %a)
declare i1 @llvm.vector.reduce.umax.v1i1(<1 x i1> %a)
declare i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> %a)
declare i16 @llvm.vector.reduce.umax.v1i16(<1 x i16> %a)
declare i24 @llvm.vector.reduce.umax.v1i24(<1 x i24> %a)
declare i32 @llvm.vector.reduce.umax.v1i32(<1 x i32> %a)
declare i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> %a)
declare i128 @llvm.vector.reduce.umax.v1i128(<1 x i128> %a)

declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %a)
declare i8 @llvm.vector.reduce.umax.v3i8(<3 x i8> %a)
declare i8 @llvm.vector.reduce.umax.v9i8(<9 x i8> %a)
declare i32 @llvm.vector.reduce.umax.v3i32(<3 x i32> %a)
declare i1 @llvm.vector.reduce.umax.v4i1(<4 x i1> %a)
declare i24 @llvm.vector.reduce.umax.v4i24(<4 x i24> %a)
declare i128 @llvm.vector.reduce.umax.v2i128(<2 x i128> %a)
declare i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> %a)

define i1 @test_v1i1(<1 x i1> %a) nounwind {
; CHECK-LABEL: test_v1i1:
; CHECK: // %bb.0:
; CHECK-NEXT: and w0, w0, #0x1
; CHECK-NEXT: ret
%b = call i1 @llvm.experimental.vector.reduce.umax.v1i1(<1 x i1> %a)
%b = call i1 @llvm.vector.reduce.umax.v1i1(<1 x i1> %a)
ret i1 %b
}

Expand All @@ -33,7 +33,7 @@ define i8 @test_v1i8(<1 x i8> %a) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: umov w0, v0.b[0]
; CHECK-NEXT: ret
%b = call i8 @llvm.experimental.vector.reduce.umax.v1i8(<1 x i8> %a)
%b = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> %a)
ret i8 %b
}

Expand All @@ -43,15 +43,15 @@ define i16 @test_v1i16(<1 x i16> %a) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: umov w0, v0.h[0]
; CHECK-NEXT: ret
%b = call i16 @llvm.experimental.vector.reduce.umax.v1i16(<1 x i16> %a)
%b = call i16 @llvm.vector.reduce.umax.v1i16(<1 x i16> %a)
ret i16 %b
}

define i24 @test_v1i24(<1 x i24> %a) nounwind {
; CHECK-LABEL: test_v1i24:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%b = call i24 @llvm.experimental.vector.reduce.umax.v1i24(<1 x i24> %a)
%b = call i24 @llvm.vector.reduce.umax.v1i24(<1 x i24> %a)
ret i24 %b
}

Expand All @@ -61,7 +61,7 @@ define i32 @test_v1i32(<1 x i32> %a) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%b = call i32 @llvm.experimental.vector.reduce.umax.v1i32(<1 x i32> %a)
%b = call i32 @llvm.vector.reduce.umax.v1i32(<1 x i32> %a)
ret i32 %b
}

Expand All @@ -71,15 +71,15 @@ define i64 @test_v1i64(<1 x i64> %a) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%b = call i64 @llvm.experimental.vector.reduce.umax.v1i64(<1 x i64> %a)
%b = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> %a)
ret i64 %b
}

define i128 @test_v1i128(<1 x i128> %a) nounwind {
; CHECK-LABEL: test_v1i128:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%b = call i128 @llvm.experimental.vector.reduce.umax.v1i128(<1 x i128> %a)
%b = call i128 @llvm.vector.reduce.umax.v1i128(<1 x i128> %a)
ret i128 %b
}

Expand All @@ -92,7 +92,7 @@ define i64 @test_v2i64(<2 x i64> %a) nounwind {
; CHECK-NEXT: cmp x9, x8
; CHECK-NEXT: csel x0, x9, x8, hi
; CHECK-NEXT: ret
%b = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> %a)
%b = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %a)
ret i64 %b
}

Expand All @@ -107,7 +107,7 @@ define i8 @test_v3i8(<3 x i8> %a) nounwind {
; CHECK-NEXT: umaxv h0, v0.4h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%b = call i8 @llvm.experimental.vector.reduce.umax.v3i8(<3 x i8> %a)
%b = call i8 @llvm.vector.reduce.umax.v3i8(<3 x i8> %a)
ret i8 %b
}

Expand All @@ -124,7 +124,7 @@ define i8 @test_v9i8(<9 x i8> %a) nounwind {
; CHECK-NEXT: umaxv b0, v0.16b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%b = call i8 @llvm.experimental.vector.reduce.umax.v9i8(<9 x i8> %a)
%b = call i8 @llvm.vector.reduce.umax.v9i8(<9 x i8> %a)
ret i8 %b
}

Expand All @@ -135,7 +135,7 @@ define i32 @test_v3i32(<3 x i32> %a) nounwind {
; CHECK-NEXT: umaxv s0, v0.4s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%b = call i32 @llvm.experimental.vector.reduce.umax.v3i32(<3 x i32> %a)
%b = call i32 @llvm.vector.reduce.umax.v3i32(<3 x i32> %a)
ret i32 %b
}

Expand All @@ -148,7 +148,7 @@ define i1 @test_v4i1(<4 x i1> %a) nounwind {
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
%b = call i1 @llvm.experimental.vector.reduce.umax.v4i1(<4 x i1> %a)
%b = call i1 @llvm.vector.reduce.umax.v4i1(<4 x i1> %a)
ret i1 %b
}

Expand All @@ -159,7 +159,7 @@ define i24 @test_v4i24(<4 x i24> %a) nounwind {
; CHECK-NEXT: umaxv s0, v0.4s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%b = call i24 @llvm.experimental.vector.reduce.umax.v4i24(<4 x i24> %a)
%b = call i24 @llvm.vector.reduce.umax.v4i24(<4 x i24> %a)
ret i24 %b
}

Expand All @@ -173,7 +173,7 @@ define i128 @test_v2i128(<2 x i128> %a) nounwind {
; CHECK-NEXT: csel x0, x8, x9, eq
; CHECK-NEXT: csel x1, x1, x3, hi
; CHECK-NEXT: ret
%b = call i128 @llvm.experimental.vector.reduce.umax.v2i128(<2 x i128> %a)
%b = call i128 @llvm.vector.reduce.umax.v2i128(<2 x i128> %a)
ret i128 %b
}

Expand All @@ -186,6 +186,6 @@ define i32 @test_v16i32(<16 x i32> %a) nounwind {
; CHECK-NEXT: umaxv s0, v0.4s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%b = call i32 @llvm.experimental.vector.reduce.umax.v16i32(<16 x i32> %a)
%b = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> %a)
ret i32 %b
}
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-soft-float.ll
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=arm-none-eabi -mattr=-neon | FileCheck %s --check-prefix=CHECK

declare half @llvm.experimental.vector.reduce.v2.fadd.f16.v4f16(half, <4 x half>)
declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float, <4 x float>)
declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double, <2 x double>)
declare fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v2f128(fp128, <2 x fp128>)
declare half @llvm.vector.reduce.fadd.f16.v4f16(half, <4 x half>)
declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
declare double @llvm.vector.reduce.fadd.f64.v2f64(double, <2 x double>)
declare fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128, <2 x fp128>)

define half @test_v4f16(<4 x half> %a) nounwind {
; CHECK-LABEL: test_v4f16:
Expand Down Expand Up @@ -37,7 +37,7 @@ define half @test_v4f16(<4 x half> %a) nounwind {
; CHECK-NEXT: bl __aeabi_f2h
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: mov pc, lr
%b = call fast half @llvm.experimental.vector.reduce.v2.fadd.f16.v4f16(half 0.0, <4 x half> %a)
%b = call fast half @llvm.vector.reduce.fadd.f16.v4f16(half 0.0, <4 x half> %a)
ret half %b
}

Expand All @@ -55,7 +55,7 @@ define float @test_v4f32(<4 x float> %a) nounwind {
; CHECK-NEXT: bl __aeabi_fadd
; CHECK-NEXT: pop {r4, r5, r11, lr}
; CHECK-NEXT: mov pc, lr
%b = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.0, <4 x float> %a)
%b = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float 0.0, <4 x float> %a)
ret float %b
}

Expand All @@ -67,7 +67,7 @@ define double @test_v2f64(<2 x double> %a) nounwind {
; CHECK-NEXT: bl __aeabi_dadd
; CHECK-NEXT: pop {r11, lr}
; CHECK-NEXT: mov pc, lr
%b = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double zeroinitializer, <2 x double> %a)
%b = call fast double @llvm.vector.reduce.fadd.f64.v2f64(double zeroinitializer, <2 x double> %a)
ret double %b
}

Expand All @@ -90,6 +90,6 @@ define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: pop {r11, lr}
; CHECK-NEXT: mov pc, lr
%b = call fast fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a)
%b = call fast fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a)
ret fp128 %b
}
28 changes: 14 additions & 14 deletions llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-strict.ll
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+neon | FileCheck %s --check-prefix=CHECK

declare half @llvm.experimental.vector.reduce.v2.fadd.f16.v1f16(half, <1 x half>)
declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v1f32(float, <1 x float>)
declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v1f64(double, <1 x double>)
declare fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v1f128(fp128, <1 x fp128>)
declare half @llvm.vector.reduce.fadd.f16.v1f16(half, <1 x half>)
declare float @llvm.vector.reduce.fadd.f32.v1f32(float, <1 x float>)
declare double @llvm.vector.reduce.fadd.f64.v1f64(double, <1 x double>)
declare fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128, <1 x fp128>)

declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v3f32(float, <3 x float>)
declare fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v2f128(fp128, <2 x fp128>)
declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float, <16 x float>)
declare float @llvm.vector.reduce.fadd.f32.v3f32(float, <3 x float>)
declare fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128, <2 x fp128>)
declare float @llvm.vector.reduce.fadd.f32.v16f32(float, <16 x float>)

define half @test_v1f16(<1 x half> %a) nounwind {
; CHECK-LABEL: test_v1f16:
Expand All @@ -28,7 +28,7 @@ define half @test_v1f16(<1 x half> %a) nounwind {
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI0_0:
; CHECK-NEXT: .long 0x00000000 @ float 0
%b = call half @llvm.experimental.vector.reduce.v2.fadd.f16.v1f16(half 0.0, <1 x half> %a)
%b = call half @llvm.vector.reduce.fadd.f16.v1f16(half 0.0, <1 x half> %a)
ret half %b
}

Expand All @@ -44,7 +44,7 @@ define float @test_v1f32(<1 x float> %a) nounwind {
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI1_0:
; CHECK-NEXT: .long 0x00000000 @ float 0
%b = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v1f32(float 0.0, <1 x float> %a)
%b = call float @llvm.vector.reduce.fadd.f32.v1f32(float 0.0, <1 x float> %a)
ret float %b
}

Expand All @@ -56,7 +56,7 @@ define double @test_v1f64(<1 x double> %a) nounwind {
; CHECK-NEXT: vadd.f64 d16, d17, d16
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: mov pc, lr
%b = call double @llvm.experimental.vector.reduce.v2.fadd.f64.v1f64(double 0.0, <1 x double> %a)
%b = call double @llvm.vector.reduce.fadd.f64.v1f64(double 0.0, <1 x double> %a)
ret double %b
}

Expand All @@ -76,7 +76,7 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: pop {r11, lr}
; CHECK-NEXT: mov pc, lr
%b = call fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v1f128(fp128 zeroinitializer, <1 x fp128> %a)
%b = call fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128 zeroinitializer, <1 x fp128> %a)
ret fp128 %b
}

Expand All @@ -95,7 +95,7 @@ define float @test_v3f32(<3 x float> %a) nounwind {
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI4_0:
; CHECK-NEXT: .long 0x00000000 @ float 0
%b = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v3f32(float 0.0, <3 x float> %a)
%b = call float @llvm.vector.reduce.fadd.f32.v3f32(float 0.0, <3 x float> %a)
ret float %b
}

Expand Down Expand Up @@ -124,7 +124,7 @@ define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: pop {r4, r5, r11, lr}
; CHECK-NEXT: mov pc, lr
%b = call fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a)
%b = call fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a)
ret fp128 %b
}

Expand Down Expand Up @@ -162,6 +162,6 @@ define float @test_v16f32(<16 x float> %a) nounwind {
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI6_0:
; CHECK-NEXT: .long 0x00000000 @ float 0
%b = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v16f32(float 0.0, <16 x float> %a)
%b = call float @llvm.vector.reduce.fadd.f32.v16f32(float 0.0, <16 x float> %a)
ret float %b
}
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/ARM/vecreduce-fmax-legalization-soft-float.ll
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=arm-none-eabi -mattr=-neon | FileCheck %s --check-prefix=CHECK

declare half @llvm.experimental.vector.reduce.fmax.v4f16(<4 x half>)
declare float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float>)
declare double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double>)
declare fp128 @llvm.experimental.vector.reduce.fmax.v2f128(<2 x fp128>)
declare half @llvm.vector.reduce.fmax.v4f16(<4 x half>)
declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>)
declare fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128>)

define half @test_v4f16(<4 x half> %a) nounwind {
; CHECK-LABEL: test_v4f16:
Expand Down Expand Up @@ -37,7 +37,7 @@ define half @test_v4f16(<4 x half> %a) nounwind {
; CHECK-NEXT: bl __aeabi_f2h
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: mov pc, lr
%b = call fast half @llvm.experimental.vector.reduce.fmax.v4f16(<4 x half> %a)
%b = call fast half @llvm.vector.reduce.fmax.v4f16(<4 x half> %a)
ret half %b
}

Expand All @@ -55,7 +55,7 @@ define float @test_v4f32(<4 x float> %a) nounwind {
; CHECK-NEXT: bl fmaxf
; CHECK-NEXT: pop {r4, r5, r11, lr}
; CHECK-NEXT: mov pc, lr
%b = call fast float @llvm.experimental.vector.reduce.fmax.v4f32(<4 x float> %a)
%b = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a)
ret float %b
}

Expand All @@ -67,7 +67,7 @@ define double @test_v2f64(<2 x double> %a) nounwind {
; CHECK-NEXT: bl fmax
; CHECK-NEXT: pop {r11, lr}
; CHECK-NEXT: mov pc, lr
%b = call fast double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> %a)
%b = call fast double @llvm.vector.reduce.fmax.v2f64(<2 x double> %a)
ret double %b
}

Expand All @@ -90,6 +90,6 @@ define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: pop {r11, lr}
; CHECK-NEXT: mov pc, lr
%b = call fast fp128 @llvm.experimental.vector.reduce.fmax.v2f128(<2 x fp128> %a)
%b = call fast fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128> %a)
ret fp128 %b
}
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/ARM/vecreduce-fmin-legalization-soft-float.ll
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=arm-none-eabi -mattr=-neon | FileCheck %s --check-prefix=CHECK

declare half @llvm.experimental.vector.reduce.fmin.v4f16(<4 x half>)
declare float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float>)
declare double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double>)
declare fp128 @llvm.experimental.vector.reduce.fmin.v2f128(<2 x fp128>)
declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>)
declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)
declare fp128 @llvm.vector.reduce.fmin.v2f128(<2 x fp128>)

define half @test_v4f16(<4 x half> %a) nounwind {
; CHECK-LABEL: test_v4f16:
Expand Down Expand Up @@ -37,7 +37,7 @@ define half @test_v4f16(<4 x half> %a) nounwind {
; CHECK-NEXT: bl __aeabi_f2h
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: mov pc, lr
%b = call fast half @llvm.experimental.vector.reduce.fmin.v4f16(<4 x half> %a)
%b = call fast half @llvm.vector.reduce.fmin.v4f16(<4 x half> %a)
ret half %b
}

Expand All @@ -55,7 +55,7 @@ define float @test_v4f32(<4 x float> %a) nounwind {
; CHECK-NEXT: bl fminf
; CHECK-NEXT: pop {r4, r5, r11, lr}
; CHECK-NEXT: mov pc, lr
%b = call fast float @llvm.experimental.vector.reduce.fmin.v4f32(<4 x float> %a)
%b = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a)
ret float %b
}

Expand All @@ -67,7 +67,7 @@ define double @test_v2f64(<2 x double> %a) nounwind {
; CHECK-NEXT: bl fmin
; CHECK-NEXT: pop {r11, lr}
; CHECK-NEXT: mov pc, lr
%b = call fast double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %a)
%b = call fast double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a)
ret double %b
}

Expand All @@ -90,6 +90,6 @@ define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: pop {r11, lr}
; CHECK-NEXT: mov pc, lr
%b = call fast fp128 @llvm.experimental.vector.reduce.fmin.v2f128(<2 x fp128> %a)
%b = call fast fp128 @llvm.vector.reduce.fmin.v2f128(<2 x fp128> %a)
ret fp128 %b
}
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-soft-float.ll
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=arm-none-eabi -mattr=-neon | FileCheck %s --check-prefix=CHECK

declare half @llvm.experimental.vector.reduce.v2.fmul.f16.v4f16(half, <4 x half>)
declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float, <4 x float>)
declare double @llvm.experimental.vector.reduce.v2.fmul.f64.v2f64(double, <2 x double>)
declare fp128 @llvm.experimental.vector.reduce.v2.fmul.f128.v2f128(fp128, <2 x fp128>)
declare half @llvm.vector.reduce.fmul.f16.v4f16(half, <4 x half>)
declare float @llvm.vector.reduce.fmul.f32.v4f32(float, <4 x float>)
declare double @llvm.vector.reduce.fmul.f64.v2f64(double, <2 x double>)
declare fp128 @llvm.vector.reduce.fmul.f128.v2f128(fp128, <2 x fp128>)

define half @test_v4f16(<4 x half> %a) nounwind {
; CHECK-LABEL: test_v4f16:
Expand Down Expand Up @@ -37,7 +37,7 @@ define half @test_v4f16(<4 x half> %a) nounwind {
; CHECK-NEXT: bl __aeabi_f2h
; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: mov pc, lr
%b = call fast half @llvm.experimental.vector.reduce.v2.fmul.f16.v4f16(half 1.0, <4 x half> %a)
%b = call fast half @llvm.vector.reduce.fmul.f16.v4f16(half 1.0, <4 x half> %a)
ret half %b
}

Expand All @@ -55,7 +55,7 @@ define float @test_v4f32(<4 x float> %a) nounwind {
; CHECK-NEXT: bl __aeabi_fmul
; CHECK-NEXT: pop {r4, r5, r11, lr}
; CHECK-NEXT: mov pc, lr
%b = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.0, <4 x float> %a)
%b = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %a)
ret float %b
}

Expand All @@ -67,7 +67,7 @@ define double @test_v2f64(<2 x double> %a) nounwind {
; CHECK-NEXT: bl __aeabi_dmul
; CHECK-NEXT: pop {r11, lr}
; CHECK-NEXT: mov pc, lr
%b = call fast double @llvm.experimental.vector.reduce.v2.fmul.f64.v2f64(double 1.0, <2 x double> %a)
%b = call fast double @llvm.vector.reduce.fmul.f64.v2f64(double 1.0, <2 x double> %a)
ret double %b
}

Expand All @@ -90,6 +90,6 @@ define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: pop {r11, lr}
; CHECK-NEXT: mov pc, lr
%b = call fast fp128 @llvm.experimental.vector.reduce.v2.fmul.f128.v2f128(fp128 0xL00000000000000003fff00000000000000, <2 x fp128> %a)
%b = call fast fp128 @llvm.vector.reduce.fmul.f128.v2f128(fp128 0xL00000000000000003fff00000000000000, <2 x fp128> %a)
ret fp128 %b
}
28 changes: 14 additions & 14 deletions llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-strict.ll
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+neon | FileCheck %s --check-prefix=CHECK

declare half @llvm.experimental.vector.reduce.v2.fmul.f16.v1f16(half, <1 x half>)
declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v1f32(float, <1 x float>)
declare double @llvm.experimental.vector.reduce.v2.fmul.f64.v1f64(double, <1 x double>)
declare fp128 @llvm.experimental.vector.reduce.v2.fmul.f128.v1f128(fp128, <1 x fp128>)
declare half @llvm.vector.reduce.fmul.f16.v1f16(half, <1 x half>)
declare float @llvm.vector.reduce.fmul.f32.v1f32(float, <1 x float>)
declare double @llvm.vector.reduce.fmul.f64.v1f64(double, <1 x double>)
declare fp128 @llvm.vector.reduce.fmul.f128.v1f128(fp128, <1 x fp128>)

declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v3f32(float, <3 x float>)
declare fp128 @llvm.experimental.vector.reduce.v2.fmul.f128.v2f128(fp128, <2 x fp128>)
declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v16f32(float, <16 x float>)
declare float @llvm.vector.reduce.fmul.f32.v3f32(float, <3 x float>)
declare fp128 @llvm.vector.reduce.fmul.f128.v2f128(fp128, <2 x fp128>)
declare float @llvm.vector.reduce.fmul.f32.v16f32(float, <16 x float>)

define half @test_v1f16(<1 x half> %a) nounwind {
; CHECK-LABEL: test_v1f16:
Expand All @@ -28,7 +28,7 @@ define half @test_v1f16(<1 x half> %a) nounwind {
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI0_0:
; CHECK-NEXT: .long 0x00000000 @ float 0
%b = call half @llvm.experimental.vector.reduce.v2.fmul.f16.v1f16(half 0.0, <1 x half> %a)
%b = call half @llvm.vector.reduce.fmul.f16.v1f16(half 0.0, <1 x half> %a)
ret half %b
}

Expand All @@ -44,7 +44,7 @@ define float @test_v1f32(<1 x float> %a) nounwind {
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI1_0:
; CHECK-NEXT: .long 0x00000000 @ float 0
%b = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v1f32(float 0.0, <1 x float> %a)
%b = call float @llvm.vector.reduce.fmul.f32.v1f32(float 0.0, <1 x float> %a)
ret float %b
}

Expand All @@ -56,7 +56,7 @@ define double @test_v1f64(<1 x double> %a) nounwind {
; CHECK-NEXT: vmul.f64 d16, d17, d16
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: mov pc, lr
%b = call double @llvm.experimental.vector.reduce.v2.fmul.f64.v1f64(double 0.0, <1 x double> %a)
%b = call double @llvm.vector.reduce.fmul.f64.v1f64(double 0.0, <1 x double> %a)
ret double %b
}

Expand All @@ -76,7 +76,7 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: pop {r11, lr}
; CHECK-NEXT: mov pc, lr
%b = call fp128 @llvm.experimental.vector.reduce.v2.fmul.f128.v1f128(fp128 zeroinitializer, <1 x fp128> %a)
%b = call fp128 @llvm.vector.reduce.fmul.f128.v1f128(fp128 zeroinitializer, <1 x fp128> %a)
ret fp128 %b
}

Expand All @@ -95,7 +95,7 @@ define float @test_v3f32(<3 x float> %a) nounwind {
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI4_0:
; CHECK-NEXT: .long 0x00000000 @ float 0
%b = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v3f32(float 0.0, <3 x float> %a)
%b = call float @llvm.vector.reduce.fmul.f32.v3f32(float 0.0, <3 x float> %a)
ret float %b
}

Expand Down Expand Up @@ -124,7 +124,7 @@ define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: pop {r4, r5, r11, lr}
; CHECK-NEXT: mov pc, lr
%b = call fp128 @llvm.experimental.vector.reduce.v2.fmul.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a)
%b = call fp128 @llvm.vector.reduce.fmul.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a)
ret fp128 %b
}

Expand Down Expand Up @@ -162,6 +162,6 @@ define float @test_v16f32(<16 x float> %a) nounwind {
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI6_0:
; CHECK-NEXT: .long 0x00000000 @ float 0
%b = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v16f32(float 0.0, <16 x float> %a)
%b = call float @llvm.vector.reduce.fmul.f32.v16f32(float 0.0, <16 x float> %a)
ret float %b
}
74 changes: 37 additions & 37 deletions llvm/test/CodeGen/Generic/expand-experimental-reductions.ll
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -expand-reductions -S | FileCheck %s
; Tests without a target which should expand all reductions
declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64>)
declare i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64>)
declare i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64>)
declare i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64>)
declare i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64>)
declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
declare i64 @llvm.vector.reduce.mul.v2i64(<2 x i64>)
declare i64 @llvm.vector.reduce.and.v2i64(<2 x i64>)
declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>)
declare i64 @llvm.vector.reduce.xor.v2i64(<2 x i64>)

declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float, <4 x float>)
declare float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float, <4 x float>)
declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
declare float @llvm.vector.reduce.fmul.f32.v4f32(float, <4 x float>)

declare i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64>)
declare i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64>)
declare i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64>)
declare i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64>)
declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>)
declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>)
declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>)
declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>)

declare double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double>)
declare double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double>)
declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>)
declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)

declare i8 @llvm.experimental.vector.reduce.and.i8.v3i8(<3 x i8>)
declare i8 @llvm.vector.reduce.and.i8.v3i8(<3 x i8>)

define i64 @add_i64(<2 x i64> %vec) {
; CHECK-LABEL: @add_i64(
Expand All @@ -29,7 +29,7 @@ define i64 @add_i64(<2 x i64> %vec) {
; CHECK-NEXT: ret i64 [[TMP0]]
;
entry:
%r = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %vec)
%r = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %vec)
ret i64 %r
}

Expand All @@ -42,7 +42,7 @@ define i64 @mul_i64(<2 x i64> %vec) {
; CHECK-NEXT: ret i64 [[TMP0]]
;
entry:
%r = call i64 @llvm.experimental.vector.reduce.mul.v2i64(<2 x i64> %vec)
%r = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> %vec)
ret i64 %r
}

Expand All @@ -55,7 +55,7 @@ define i64 @and_i64(<2 x i64> %vec) {
; CHECK-NEXT: ret i64 [[TMP0]]
;
entry:
%r = call i64 @llvm.experimental.vector.reduce.and.v2i64(<2 x i64> %vec)
%r = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %vec)
ret i64 %r
}

Expand All @@ -68,7 +68,7 @@ define i64 @or_i64(<2 x i64> %vec) {
; CHECK-NEXT: ret i64 [[TMP0]]
;
entry:
%r = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> %vec)
%r = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %vec)
ret i64 %r
}

Expand All @@ -81,7 +81,7 @@ define i64 @xor_i64(<2 x i64> %vec) {
; CHECK-NEXT: ret i64 [[TMP0]]
;
entry:
%r = call i64 @llvm.experimental.vector.reduce.xor.v2i64(<2 x i64> %vec)
%r = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %vec)
ret i64 %r
}

Expand All @@ -97,7 +97,7 @@ define float @fadd_f32(<4 x float> %vec) {
; CHECK-NEXT: ret float [[BIN_RDX3]]
;
entry:
%r = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.0, <4 x float> %vec)
%r = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float 0.0, <4 x float> %vec)
ret float %r
}

Expand All @@ -113,7 +113,7 @@ define float @fadd_f32_accum(float %accum, <4 x float> %vec) {
; CHECK-NEXT: ret float [[BIN_RDX3]]
;
entry:
%r = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %accum, <4 x float> %vec)
%r = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %accum, <4 x float> %vec)
ret float %r
}

Expand All @@ -131,7 +131,7 @@ define float @fadd_f32_strict(<4 x float> %vec) {
; CHECK-NEXT: ret float [[BIN_RDX3]]
;
entry:
%r = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float undef, <4 x float> %vec)
%r = call float @llvm.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %vec)
ret float %r
}

Expand All @@ -149,7 +149,7 @@ define float @fadd_f32_strict_accum(float %accum, <4 x float> %vec) {
; CHECK-NEXT: ret float [[BIN_RDX3]]
;
entry:
%r = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %accum, <4 x float> %vec)
%r = call float @llvm.vector.reduce.fadd.f32.v4f32(float %accum, <4 x float> %vec)
ret float %r
}

Expand All @@ -165,7 +165,7 @@ define float @fmul_f32(<4 x float> %vec) {
; CHECK-NEXT: ret float [[BIN_RDX3]]
;
entry:
%r = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.0, <4 x float> %vec)
%r = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %vec)
ret float %r
}

Expand All @@ -181,7 +181,7 @@ define float @fmul_f32_accum(float %accum, <4 x float> %vec) {
; CHECK-NEXT: ret float [[BIN_RDX3]]
;
entry:
%r = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float %accum, <4 x float> %vec)
%r = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float %accum, <4 x float> %vec)
ret float %r
}

Expand All @@ -199,7 +199,7 @@ define float @fmul_f32_strict(<4 x float> %vec) {
; CHECK-NEXT: ret float [[BIN_RDX3]]
;
entry:
%r = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float undef, <4 x float> %vec)
%r = call float @llvm.vector.reduce.fmul.f32.v4f32(float undef, <4 x float> %vec)
ret float %r
}

Expand All @@ -217,7 +217,7 @@ define float @fmul_f32_strict_accum(float %accum, <4 x float> %vec) {
; CHECK-NEXT: ret float [[BIN_RDX3]]
;
entry:
%r = call float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float %accum, <4 x float> %vec)
%r = call float @llvm.vector.reduce.fmul.f32.v4f32(float %accum, <4 x float> %vec)
ret float %r
}

Expand All @@ -231,7 +231,7 @@ define i64 @smax_i64(<2 x i64> %vec) {
; CHECK-NEXT: ret i64 [[TMP0]]
;
entry:
%r = call i64 @llvm.experimental.vector.reduce.smax.v2i64(<2 x i64> %vec)
%r = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> %vec)
ret i64 %r
}

Expand All @@ -245,7 +245,7 @@ define i64 @smin_i64(<2 x i64> %vec) {
; CHECK-NEXT: ret i64 [[TMP0]]
;
entry:
%r = call i64 @llvm.experimental.vector.reduce.smin.v2i64(<2 x i64> %vec)
%r = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> %vec)
ret i64 %r
}

Expand All @@ -259,7 +259,7 @@ define i64 @umax_i64(<2 x i64> %vec) {
; CHECK-NEXT: ret i64 [[TMP0]]
;
entry:
%r = call i64 @llvm.experimental.vector.reduce.umax.v2i64(<2 x i64> %vec)
%r = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %vec)
ret i64 %r
}

Expand All @@ -273,7 +273,7 @@ define i64 @umin_i64(<2 x i64> %vec) {
; CHECK-NEXT: ret i64 [[TMP0]]
;
entry:
%r = call i64 @llvm.experimental.vector.reduce.umin.v2i64(<2 x i64> %vec)
%r = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> %vec)
ret i64 %r
}

Expand All @@ -282,11 +282,11 @@ entry:
define double @fmax_f64(<2 x double> %vec) {
; CHECK-LABEL: @fmax_f64(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[R:%.*]] = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> [[VEC:%.*]])
; CHECK-NEXT: [[R:%.*]] = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> [[VEC:%.*]])
; CHECK-NEXT: ret double [[R]]
;
entry:
%r = call double @llvm.experimental.vector.reduce.fmax.v2f64(<2 x double> %vec)
%r = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %vec)
ret double %r
}

Expand All @@ -295,11 +295,11 @@ entry:
define double @fmin_f64(<2 x double> %vec) {
; CHECK-LABEL: @fmin_f64(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[R:%.*]] = call double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> [[VEC:%.*]])
; CHECK-NEXT: [[R:%.*]] = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> [[VEC:%.*]])
; CHECK-NEXT: ret double [[R]]
;
entry:
%r = call double @llvm.experimental.vector.reduce.fmin.v2f64(<2 x double> %vec)
%r = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %vec)
ret double %r
}

Expand All @@ -309,10 +309,10 @@ entry:
define i8 @test_v3i8(<3 x i8> %a) nounwind {
; CHECK-LABEL: @test_v3i8(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[B:%.*]] = call i8 @llvm.experimental.vector.reduce.and.v3i8(<3 x i8> [[A:%.*]])
; CHECK-NEXT: [[B:%.*]] = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> [[A:%.*]])
; CHECK-NEXT: ret i8 [[B]]
;
entry:
%b = call i8 @llvm.experimental.vector.reduce.and.i8.v3i8(<3 x i8> %a)
%b = call i8 @llvm.vector.reduce.and.i8.v3i8(<3 x i8> %a)
ret i8 %b
}
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/add_reduce.mir
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
%add7 = add <4 x i32> %mul, %splat.output
%max = tail call <4 x i32> @llvm.arm.mve.max.predicated.v4i32.v4i1(<4 x i32> %add7, <4 x i32> %.splat.i42, i32 1, <4 x i1> %pred, <4 x i32> undef)
%min = tail call <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32> %max, <4 x i32> %.splat.i, i32 1, <4 x i1> %pred, <4 x i32> undef)
%reduce = tail call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %min)
%reduce = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %min)
store i32 %reduce, i32* %scevgep2
%add.ptr = getelementptr inbounds i8, i8* %input_1_vect.addr.052, i32 4
%add.ptr14 = getelementptr inbounds i8, i8* %input_2_vect.addr.051, i32 4
Expand All @@ -62,7 +62,7 @@
declare <4 x i32> @llvm.arm.mve.min.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <4 x i32>) #1
declare i1 @llvm.test.set.loop.iterations.i32(i32) #4
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #4
declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) #5
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #5

...
---
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ vector.body: ; preds = %vector.body, %vecto

middle.block: ; preds = %vector.body
%tmp8 = select <4 x i1> %tmp1, <4 x i32> %add, <4 x i32> %vec.phi
%tmp9 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp8)
%tmp9 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp8)
br label %for.cond.cleanup

for.cond.cleanup: ; preds = %middle.block, %entry
Expand Down Expand Up @@ -188,7 +188,7 @@ vector.body: ; preds = %vector.body, %vecto

middle.block: ; preds = %vector.body
%acc = select <4 x i1> %tmp1, <4 x i32> %add, <4 x i32> %vec.phi
%reduce = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %acc)
%reduce = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %acc)
br label %for.cond.cleanup

for.cond.cleanup: ; preds = %middle.block, %entry
Expand Down Expand Up @@ -287,7 +287,7 @@ vector.body: ; preds = %vector.body, %vecto

middle.block: ; preds = %vector.body
%acc = select <4 x i1> %tmp1, <4 x i32> %add, <4 x i32> %vec.phi
%reduce = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %acc)
%reduce = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %acc)
br label %for.cond.cleanup

for.cond.cleanup: ; preds = %middle.block, %entry
Expand Down Expand Up @@ -386,7 +386,7 @@ vector.body: ; preds = %vector.body, %vecto

middle.block: ; preds = %vector.body
%acc = select <4 x i1> %tmp1, <4 x i32> %add, <4 x i32> %vec.phi
%reduce = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %acc)
%reduce = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %acc)
br label %for.cond.cleanup

for.cond.cleanup: ; preds = %middle.block, %entry
Expand Down Expand Up @@ -528,6 +528,6 @@ declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>)

; Function Attrs: nounwind readnone willreturn
declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)

declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-1.mir
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,15 @@
br i1 %tmp16, label %vector.body, label %middle.block

middle.block: ; preds = %vector.body
%tmp17 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp14)
%tmp17 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp14)
br label %for.cond.cleanup

for.cond.cleanup: ; preds = %middle.block, %entry
%res.0.lcssa = phi i32 [ 0, %entry ], [ %tmp17, %middle.block ]
ret i32 %res.0.lcssa
}
declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1
declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) #2
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #2
declare void @llvm.set.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/inloop-vpsel-2.mir
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,15 @@
br i1 %tmp16, label %vector.body, label %middle.block

middle.block: ; preds = %vector.body
%tmp17 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp14)
%tmp17 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp14)
br label %for.cond.cleanup

for.cond.cleanup: ; preds = %middle.block, %entry
%res.0.lcssa = phi i32 [ 0, %entry ], [ %tmp17, %middle.block ]
ret i32 %res.0.lcssa
}
declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1
declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) #2
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #2
declare void @llvm.set.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/invariant-qreg.mir
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
%wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32>
%tmp12 = mul nsw <4 x i32> %pass, %tmp10
%tmp13 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp12)
%tmp13 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp12)
%scevgep = getelementptr i16, i16* %lsr.iv, i32 4
%tmp15 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
%tmp16 = icmp ne i32 %tmp15, 0
Expand Down Expand Up @@ -105,7 +105,7 @@
%wide.masked.load = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %lsr.iv17, i32 2, <4 x i1> %tmp8, <4 x i16> undef)
%tmp10 = sext <4 x i16> %wide.masked.load to <4 x i32>
%tmp12 = add nsw <4 x i32> %pass, %tmp10
%tmp13 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp12)
%tmp13 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp12)
%scevgep = getelementptr i16, i16* %lsr.iv, i32 4
%tmp15 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv1, i32 1)
%tmp16 = icmp ne i32 %tmp15, 0
Expand All @@ -117,7 +117,7 @@
ret i32 %res
}

declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>)
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
declare void @llvm.set.loop.iterations.i32(i32)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
br i1 %15, label %vector.body, label %middle.block

middle.block: ; preds = %vector.body
%16 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> %13)
%16 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float 0.000000e+00, <4 x float> %13)
br label %for.cond.cleanup

for.cond.cleanup: ; preds = %middle.block, %entry
Expand Down Expand Up @@ -88,7 +88,7 @@
br i1 %15, label %vector.body, label %middle.block

middle.block: ; preds = %vector.body
%16 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> %13)
%16 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float 0.000000e+00, <4 x float> %13)
br label %for.cond.cleanup

for.cond.cleanup: ; preds = %middle.block, %entry
Expand All @@ -98,7 +98,7 @@

declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>)
declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float, <4 x float>)
declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32(i32, i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/matrix.mir
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@
%22 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %10)
%23 = bitcast i16* %lsr.iv7 to i1*
%24 = select <4 x i1> %22, <4 x i32> %.lcssa, <4 x i32> %vec.phi.lcssa
%25 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %24)
%25 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %24)
%sunkaddr = mul i32 %i.064.us, 4
%26 = bitcast i32* %e to i8*
%sunkaddr17 = getelementptr inbounds i8, i8* %26, i32 %sunkaddr
Expand Down Expand Up @@ -141,7 +141,7 @@
}
declare dso_local arm_aapcs_vfpcc signext i16 @crc16(...) local_unnamed_addr #0
declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1
declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) #2
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #2
declare void @llvm.set.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ vector.body: ; preds = %vector.body, %vecto

middle.block: ; preds = %vector.body
%7 = select <4 x i1> %1, <4 x i32> %5, <4 x i32> %vec.phi
%8 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %7)
%8 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %7)
br label %for.cond.cleanup

for.cond.cleanup: ; preds = %middle.block, %entry
Expand Down Expand Up @@ -145,7 +145,7 @@ vector.body: ; preds = %vector.body, %vecto

middle.block: ; preds = %vector.body
%7 = select <4 x i1> %1, <4 x i32> %5, <4 x i32> %vec.phi
%8 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %7)
%8 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %7)
br label %for.cond.cleanup

for.cond.cleanup: ; preds = %middle.block, %entry
Expand Down Expand Up @@ -221,7 +221,7 @@ vector.body: ; preds = %vector.body, %vecto

middle.block: ; preds = %vector.body
%7 = select <4 x i1> %1, <4 x i32> %5, <4 x i32> %vec.phi
%8 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %7)
%8 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %7)
br label %for.cond.cleanup

for.cond.cleanup: ; preds = %middle.block, %entry
Expand Down Expand Up @@ -297,7 +297,7 @@ vector.body: ; preds = %vector.body, %vecto

middle.block: ; preds = %vector.body
%7 = select <4 x i1> %1, <4 x i32> %5, <4 x i32> %vec.phi
%8 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %7)
%8 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %7)
br label %for.cond.cleanup

for.cond.cleanup: ; preds = %middle.block, %entry
Expand Down Expand Up @@ -371,7 +371,7 @@ vector.body: ; preds = %vector.body, %vecto

middle.block: ; preds = %vector.body
%6 = select <4 x i1> %1, <4 x i32> %4, <4 x i32> %vec.phi
%7 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %6)
%7 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %6)
br label %for.cond.cleanup

for.cond.cleanup: ; preds = %middle.block, %entry
Expand Down Expand Up @@ -1273,6 +1273,6 @@ declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i
declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>)
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32)
declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32)
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ define void @mat_vec_sext_i16(i16** nocapture readonly %A, i16* nocapture readon
; CHECK-NEXT: br i1 [[TMP16]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP14]], <4 x i32> [[VEC_PHI]]
; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP17]])
; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP17]])
; CHECK-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX8_US]], align 4
; CHECK-NEXT: [[INC10_US]] = add nuw i32 [[I_025_US]], 1
; CHECK-NEXT: [[EXITCOND27:%.*]] = icmp eq i32 [[INC10_US]], [[N]]
Expand Down Expand Up @@ -112,7 +112,7 @@ vector.body: ; preds = %vector.body, %for.c

middle.block: ; preds = %vector.body
%tmp17 = select <4 x i1> %tmp7, <4 x i32> %tmp14, <4 x i32> %vec.phi
%tmp18 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp17)
%tmp18 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp17)
store i32 %tmp18, i32* %arrayidx8.us, align 4
%inc10.us = add nuw i32 %i.025.us, 1
%exitcond27 = icmp eq i32 %inc10.us, %N
Expand Down Expand Up @@ -170,7 +170,7 @@ define void @mat_vec_i32(i32** nocapture readonly %A, i32* nocapture readonly %B
; CHECK-NEXT: br i1 [[TMP14]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP12]], <4 x i32> [[VEC_PHI]]
; CHECK-NEXT: [[TMP16:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP15]])
; CHECK-NEXT: [[TMP16:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP15]])
; CHECK-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX7_US]], align 4
; CHECK-NEXT: [[INC9_US]] = add nuw i32 [[I_024_US]], 1
; CHECK-NEXT: [[EXITCOND26:%.*]] = icmp eq i32 [[INC9_US]], [[N]]
Expand Down Expand Up @@ -229,7 +229,7 @@ vector.body: ; preds = %vector.body, %for.c

middle.block: ; preds = %vector.body
%tmp15 = select <4 x i1> %tmp7, <4 x i32> %tmp12, <4 x i32> %vec.phi
%tmp16 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp15)
%tmp16 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp15)
store i32 %tmp16, i32* %arrayidx7.us, align 4
%inc9.us = add nuw i32 %i.024.us, 1
%exitcond26 = icmp eq i32 %inc9.us, %N
Expand All @@ -247,7 +247,7 @@ declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i
declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #0

; Function Attrs: nounwind readnone willreturn
declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) #1
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #1

; Function Attrs: noduplicate nounwind
declare void @llvm.set.loop.iterations.i32(i32) #2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,15 @@
br i1 %tmp15, label %vector.body, label %middle.block

middle.block: ; preds = %vector.body
%tmp16 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp13)
%tmp16 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %tmp13)
br label %for.cond.cleanup

for.cond.cleanup: ; preds = %middle.block, %entry
%res.0.lcssa = phi i32 [ 0, %entry ], [ %tmp16, %middle.block ]
ret i32 %res.0.lcssa
}
declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>) #1
declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>) #2
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #2
declare void @llvm.set.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
declare <4 x i1> @llvm.arm.mve.vctp32(i32) #4
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/non-masked-load.mir
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
%.lcssa = phi <16 x i8> [ %13, %vector.body ]
%16 = call <16 x i1> @llvm.arm.mve.vctp8(i32 %7)
%17 = select <16 x i1> %16, <16 x i8> %.lcssa, <16 x i8> %vec.phi.lcssa
%18 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %17)
%18 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %17)
br label %for.cond.cleanup

for.cond.cleanup: ; preds = %middle.block, %entry
Expand All @@ -53,7 +53,7 @@
}

declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>) #1
declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>) #2
declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) #2
declare void @llvm.set.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
declare <16 x i1> @llvm.arm.mve.vctp8(i32) #4
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
br i1 %cmp, label %for.body, label %middle.block

middle.block: ; preds = %for.body
%reduce = tail call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> %acc.next)
%reduce = tail call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %acc.next)
ret i16 %reduce

for.cond.cleanup: ; preds = %entry
Expand All @@ -47,7 +47,7 @@
declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32 immarg, <8 x i1>, <8 x i8>) #2
declare i1 @llvm.test.set.loop.iterations.i32(i32) #3
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #3
declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>) #4
declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) #4
declare <8 x i16> @llvm.arm.mve.add.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, <8 x i1>, <8 x i16>) #1

...
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
br i1 %16, label %vector.body, label %middle.block

middle.block: ; preds = %vector.body
%17 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %14)
%17 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %14)
br label %for.cond.cleanup

for.cond.cleanup: ; preds = %middle.block, %entry
Expand Down Expand Up @@ -88,7 +88,7 @@
br i1 %16, label %vector.body, label %middle.block

middle.block: ; preds = %vector.body
%17 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %14)
%17 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %14)
br label %for.cond.cleanup

for.cond.cleanup: ; preds = %middle.block, %entry
Expand Down Expand Up @@ -135,7 +135,7 @@
br i1 %16, label %vector.body, label %middle.block

middle.block: ; preds = %vector.body
%17 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %14)
%17 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %14)
br label %for.cond.cleanup

for.cond.cleanup: ; preds = %middle.block, %entry
Expand Down Expand Up @@ -182,7 +182,7 @@
br i1 %16, label %vector.body, label %middle.block

middle.block: ; preds = %vector.body
%17 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %14)
%17 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %14)
br label %for.cond.cleanup

for.cond.cleanup: ; preds = %middle.block, %entry
Expand Down Expand Up @@ -228,7 +228,7 @@
br i1 %14, label %vector.body, label %middle.block

middle.block: ; preds = %vector.body
%15 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %12)
%15 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %12)
br label %for.cond.cleanup

for.cond.cleanup: ; preds = %middle.block, %entry
Expand Down Expand Up @@ -274,7 +274,7 @@
br i1 %14, label %vector.body, label %middle.block

middle.block: ; preds = %vector.body
%15 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %12)
%15 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %12)
br label %for.cond.cleanup

for.cond.cleanup: ; preds = %middle.block, %entry
Expand All @@ -285,7 +285,7 @@
declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32 immarg, <4 x i1>, <4 x i8>)
declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>)
declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
declare void @llvm.set.loop.iterations.i32(i32)
declare i32 @llvm.loop.decrement.reg.i32(i32, i32)
declare <4 x i1> @llvm.arm.mve.vctp32(i32)
Expand Down
Loading