diff --git a/llvm/test/Analysis/CostModel/AArch64/reduce-add.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-add.ll new file mode 100644 index 0000000000000..8bbb96f20dc9a --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/reduce-add.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -cost-model -cost-kind=throughput -analyze | FileCheck %s + +define void @reduce() { +; CHECK-LABEL: 'reduce' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1i8 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V3i8 = call i8 @llvm.vector.reduce.add.v3i8(<3 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4i64 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %V1i8 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef) + %V3i8 = call i8 @llvm.vector.reduce.add.v3i8(<3 x i8> undef) + %V4i8 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) + %V8i8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) + %V16i8 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) + %V32i8 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef) + %V64i8 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef) + %V2i16 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef) + %V4i16 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef) + %V8i16 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) + %V16i16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) + %V2i32 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef) + %V4i32 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef) + %V8i32 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) + %V2i64 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) + %V4i64 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) + ret void +} + +declare i8 @llvm.vector.reduce.add.v1i8(<1 x i8>) +declare i8 @llvm.vector.reduce.add.v3i8(<3 x i8>) +declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.add.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.add.v64i8(<64 x i8>) +declare i16 @llvm.vector.reduce.add.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) +declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) +declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) diff --git a/llvm/test/Analysis/CostModel/AArch64/reduce-and.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-and.ll index 6d2fdaf573136..07f5b7fa0d54e 100644 --- a/llvm/test/Analysis/CostModel/AArch64/reduce-and.ll +++ b/llvm/test/Analysis/CostModel/AArch64/reduce-and.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -cost-model -cost-kind=throughput -analyze | FileCheck %s -define i32 @reduce_i1(i32 %arg) { -; CHECK-LABEL: 'reduce_i1' +define void @reduce() { +; CHECK-LABEL: 'reduce' ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> undef) @@ -26,7 +26,7 @@ define i32 @reduce_i1(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V1 = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> undef) @@ -53,7 +53,7 @@ define i32 @reduce_i1(i32 %arg) { %V8i32 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef) %V2i64 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef) %V4i64 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef) - ret i32 undef + ret void } declare i1 @llvm.vector.reduce.and.v1i1(<1 x i1>) diff --git a/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll new file mode 100644 index 0000000000000..a8c75a9c92221 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll @@ -0,0 +1,296 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -cost-model -cost-kind=throughput -analyze | FileCheck %s + +define void @reduce_umin() { +; CHECK-LABEL: 'reduce_umin' +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.umin.v1i8(<1 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V3i8 = call i8 @llvm.vector.reduce.umin.v3i8(<3 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4i8 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %V8i8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 608 for instruction: %V16i8 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 760 for instruction: %V32i8 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1064 for instruction: %V64i8 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4i16 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %V8i16 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V16i16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i32 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4i32 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V8i32 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i64 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4i64 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %V1i8 = call i8 @llvm.vector.reduce.umin.v1i8(<1 x i8> undef) + %V3i8 = call i8 @llvm.vector.reduce.umin.v3i8(<3 x i8> undef) + %V4i8 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) + %V8i8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) + %V16i8 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) + %V32i8 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) + %V64i8 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) + %V2i16 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) + %V4i16 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) + %V8i16 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) + %V16i16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) + %V2i32 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) + %V4i32 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) + %V8i32 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) + %V2i64 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) + %V4i64 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) + ret void +} + +define void @reduce_umax() { +; CHECK-LABEL: 'reduce_umax' +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V3i8 = call i8 @llvm.vector.reduce.umax.v3i8(<3 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4i8 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %V8i8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 608 for instruction: %V16i8 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 760 for instruction: %V32i8 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1064 for instruction: %V64i8 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4i16 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %V8i16 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V16i16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i32 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4i32 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V8i32 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i64 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4i64 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %V1i8 = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> undef) + %V3i8 = call i8 @llvm.vector.reduce.umax.v3i8(<3 x i8> undef) + %V4i8 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef) + %V8i8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef) + %V16i8 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef) + %V32i8 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef) + %V64i8 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef) + %V2i16 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) + %V4i16 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef) + %V8i16 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef) + %V16i16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef) + %V2i32 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) + %V4i32 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) + %V8i32 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) + %V2i64 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) + %V4i64 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) + ret void +} + +define void @reduce_smin() { +; CHECK-LABEL: 'reduce_smin' +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.smin.v1i8(<1 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V3i8 = call i8 @llvm.vector.reduce.smin.v3i8(<3 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4i8 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %V8i8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 608 for instruction: %V16i8 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 760 for instruction: %V32i8 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1064 for instruction: %V64i8 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4i16 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %V8i16 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V16i16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i32 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4i32 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V8i32 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i64 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4i64 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %V1i8 = call i8 @llvm.vector.reduce.smin.v1i8(<1 x i8> undef) + %V3i8 = call i8 @llvm.vector.reduce.smin.v3i8(<3 x i8> undef) + %V4i8 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef) + %V8i8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef) + %V16i8 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef) + %V32i8 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef) + %V64i8 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef) + %V2i16 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef) + %V4i16 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef) + %V8i16 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef) + %V16i16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef) + %V2i32 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef) + %V4i32 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) + %V8i32 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) + %V2i64 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) + %V4i64 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) + ret void +} + +define void @reduce_smax() { +; CHECK-LABEL: 'reduce_smax' +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1i8 = call i8 @llvm.vector.reduce.smax.v1i8(<1 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V3i8 = call i8 @llvm.vector.reduce.smax.v3i8(<3 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4i8 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %V8i8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 608 for instruction: %V16i8 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 760 for instruction: %V32i8 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1064 for instruction: %V64i8 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i16 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4i16 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %V8i16 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V16i16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i32 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4i32 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V8i32 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2i64 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4i64 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %V1i8 = call i8 @llvm.vector.reduce.smax.v1i8(<1 x i8> undef) + %V3i8 = call i8 @llvm.vector.reduce.smax.v3i8(<3 x i8> undef) + %V4i8 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) + %V8i8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) + %V16i8 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) + %V32i8 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) + %V64i8 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) + %V2i16 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) + %V4i16 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) + %V8i16 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) + %V16i16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) + %V2i32 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) + %V4i32 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) + %V8i32 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) + %V2i64 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) + %V4i64 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) + ret void +} + +define void @reduce_fmin() { +; CHECK-LABEL: 'reduce_fmin' +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 300 for instruction: %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 400 for instruction: %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %V2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef) + %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef) + %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef) + %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef) + %V2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef) + %V4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef) + %V8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef) + %V2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef) + %V4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef) + ret void +} + +define void @reduce_fmax() { +; CHECK-LABEL: 'reduce_fmax' +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 300 for instruction: %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 400 for instruction: %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %V2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef) + %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef) + %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef) + %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef) + %V2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef) + %V4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef) + %V8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef) + %V2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef) + %V4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef) + ret void +} + +declare i8 @llvm.vector.reduce.umin.v1i8(<1 x i8>) +declare i8 @llvm.vector.reduce.umin.v3i8(<3 x i8>) +declare i8 @llvm.vector.reduce.umin.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.umin.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.umin.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.umin.v64i8(<64 x i8>) +declare i16 @llvm.vector.reduce.umin.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.umin.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.umin.v16i16(<16 x i16>) +declare i32 @llvm.vector.reduce.umin.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>) +declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>) + +declare i8 @llvm.vector.reduce.umax.v1i8(<1 x i8>) +declare i8 @llvm.vector.reduce.umax.v3i8(<3 x i8>) +declare i8 @llvm.vector.reduce.umax.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.umax.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.umax.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.umax.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.umax.v64i8(<64 x i8>) +declare i16 @llvm.vector.reduce.umax.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.umax.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.umax.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.umax.v16i16(<16 x i16>) +declare i32 @llvm.vector.reduce.umax.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32>) +declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>) + +declare i8 @llvm.vector.reduce.smin.v1i8(<1 x i8>) +declare i8 @llvm.vector.reduce.smin.v3i8(<3 x i8>) +declare i8 @llvm.vector.reduce.smin.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.smin.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.smin.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.smin.v64i8(<64 x i8>) +declare i16 @llvm.vector.reduce.smin.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.smin.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.smin.v16i16(<16 x i16>) +declare i32 @llvm.vector.reduce.smin.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32>) +declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>) + +declare i8 @llvm.vector.reduce.smax.v1i8(<1 x i8>) +declare i8 @llvm.vector.reduce.smax.v3i8(<3 x i8>) +declare i8 @llvm.vector.reduce.smax.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.smax.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.smax.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.smax.v64i8(<64 x i8>) +declare i16 @llvm.vector.reduce.smax.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.smax.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.smax.v16i16(<16 x i16>) +declare i32 @llvm.vector.reduce.smax.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>) +declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>) + +declare half @llvm.vector.reduce.fmin.v2f16(<2 x half>) +declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>) +declare half @llvm.vector.reduce.fmin.v8f16(<8 x half>) +declare half @llvm.vector.reduce.fmin.v16f16(<16 x half>) +declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>) +declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) +declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>) +declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>) +declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>) + +declare half @llvm.vector.reduce.fmax.v2f16(<2 x half>) +declare half @llvm.vector.reduce.fmax.v4f16(<4 x half>) +declare half @llvm.vector.reduce.fmax.v8f16(<8 x half>) +declare half @llvm.vector.reduce.fmax.v16f16(<16 x half>) +declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>) +declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) +declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>) +declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>) +declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>) diff --git a/llvm/test/Analysis/CostModel/AArch64/reduce-or.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-or.ll index 82068c736533d..1492fa4364bc6 100644 --- a/llvm/test/Analysis/CostModel/AArch64/reduce-or.ll +++ b/llvm/test/Analysis/CostModel/AArch64/reduce-or.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -cost-model -cost-kind=throughput -analyze | FileCheck %s -define i32 @reduce_i1(i32 %arg) { -; CHECK-LABEL: 'reduce_i1' +define void @reduce() { +; CHECK-LABEL: 'reduce' ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> undef) @@ -26,7 +26,7 @@ define i32 @reduce_i1(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V1 = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> undef) @@ -53,7 +53,7 @@ define i32 @reduce_i1(i32 %arg) { %V8i32 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef) %V2i64 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef) %V4i64 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef) - ret i32 undef + ret void } declare i1 @llvm.vector.reduce.or.v1i1(<1 x i1>) diff --git a/llvm/test/Analysis/CostModel/AArch64/reduce-xor.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-xor.ll index cebcc3aace493..ad8fdb5e0a736 100644 --- a/llvm/test/Analysis/CostModel/AArch64/reduce-xor.ll +++ b/llvm/test/Analysis/CostModel/AArch64/reduce-xor.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -cost-model -cost-kind=throughput -analyze | FileCheck %s -define i32 @reduce_i1(i32 %arg) { -; CHECK-LABEL: 'reduce_i1' +define void @reduce() { +; CHECK-LABEL: 'reduce' ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef) @@ -26,7 +26,7 @@ define i32 @reduce_i1(i32 %arg) { ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef) @@ -53,7 +53,7 @@ define i32 @reduce_i1(i32 %arg) { %V8i32 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef) %V2i64 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef) %V4i64 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef) - ret i32 undef + ret void } declare i1 @llvm.vector.reduce.xor.v1i1(<1 x i1>) diff --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll new file mode 100644 index 0000000000000..646406ed5114e --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=aarch64--linux-gnu -cost-model -analyze | FileCheck %s + +define void @shuffle() { +; CHECK-LABEL: 'shuffle' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %v7 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> + %v8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> + %v9 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> + %v10 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> + + %v11 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> + %v12 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> + %v13 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> + + %v14 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> + %v15 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> + + %v16 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> + %v17 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> + + ret void +} diff --git a/llvm/test/Analysis/CostModel/AArch64/vector-reduce.ll b/llvm/test/Analysis/CostModel/AArch64/vector-reduce.ll index 3ddf90f12c2ef..df25a6fedf2b8 100644 --- a/llvm/test/Analysis/CostModel/AArch64/vector-reduce.ll +++ b/llvm/test/Analysis/CostModel/AArch64/vector-reduce.ll @@ -1,245 +1,245 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt < %s -mtriple=aarch64--linux-gnu -cost-model -analyze | FileCheck %s --check-prefix=COST -; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=CODE -; COST-LABEL: add.i8.v8i8 -; COST: Found an estimated cost of 1 for instruction: %r = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %v) -; CODE-LABEL: add.i8.v8i8 -; CODE: addv b0, v0.8b define i8 @add.i8.v8i8(<8 x i8> %v) { +; COST-LABEL: 'add.i8.v8i8' +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %v) +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r +; %r = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %v) ret i8 %r } -; COST-LABEL: add.i8.v16i8 -; COST: Found an estimated cost of 1 for instruction: %r = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %v) -; CODE-LABEL: add.i8.v16i8 -; CODE: addv b0, v0.16b define i8 @add.i8.v16i8(<16 x i8> %v) { +; COST-LABEL: 'add.i8.v16i8' +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %v) +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r +; %r = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %v) ret i8 %r } -; COST-LABEL: add.i16.v4i16 -; COST: Found an estimated cost of 1 for instruction: %r = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %v) -; CODE-LABEL: add.i16.v4i16 -; CODE: addv h0, v0.4h define i16 @add.i16.v4i16(<4 x i16> %v) { +; COST-LABEL: 'add.i16.v4i16' +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %v) +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r +; %r = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %v) ret i16 %r } -; COST-LABEL: add.i16.v8i16 -; COST: Found an estimated cost of 1 for instruction: %r = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %v) -; CODE-LABEL: add.i16.v8i16 -; CODE: addv h0, v0.8h define i16 @add.i16.v8i16(<8 x i16> %v) { +; COST-LABEL: 'add.i16.v8i16' +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %v) +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r +; %r = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %v) ret i16 %r } -; COST-LABEL: add.i32.v4i32 -; COST: Found an estimated cost of 1 for instruction: %r = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v) -; CODE-LABEL: add.i32.v4i32 -; CODE: addv s0, v0.4s define i32 @add.i32.v4i32(<4 x i32> %v) { +; COST-LABEL: 'add.i32.v4i32' +; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v) +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r +; %r = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v) ret i32 %r } -; COST-LABEL: umin.i8.v8i8 -; COST: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %v) -; CODE-LABEL: umin.i8.v8i8 -; CODE: uminv b0, v0.8b define i8 @umin.i8.v8i8(<8 x i8> %v) { +; COST-LABEL: 'umin.i8.v8i8' +; COST-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %v) +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r +; %r = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %v) ret i8 %r } -; COST-LABEL: umin.i8.v16i8 -; COST: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %v) -; CODE-LABEL: umin.i8.v16i8 -; CODE: uminv b0, v0.16b define i8 @umin.i8.v16i8(<16 x i8> %v) { +; COST-LABEL: 'umin.i8.v16i8' +; COST-NEXT: Cost Model: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %v) +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r +; %r = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %v) ret i8 %r } -; COST-LABEL: umin.i16.v4i16 -; COST: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %v) -; CODE-LABEL: umin.i16.v4i16 -; CODE: uminv h0, v0.4h define i16 @umin.i16.v4i16(<4 x i16> %v) { +; COST-LABEL: 'umin.i16.v4i16' +; COST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %v) +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r +; %r = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %v) ret i16 %r } -; COST-LABEL: umin.i16.v8i16 -; COST: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %v) -; CODE-LABEL: umin.i16.v8i16 -; CODE: uminv h0, v0.8h define i16 @umin.i16.v8i16(<8 x i16> %v) { +; COST-LABEL: 'umin.i16.v8i16' +; COST-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %v) +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r +; %r = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %v) ret i16 %r } -; COST-LABEL: umin.i32.v4i32 -; COST: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %v) -; CODE-LABEL: umin.i32.v4i32 -; CODE: uminv s0, v0.4s define i32 @umin.i32.v4i32(<4 x i32> %v) { +; COST-LABEL: 'umin.i32.v4i32' +; COST-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %v) +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r +; %r = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %v) ret i32 %r } -; COST-LABEL: umax.i8.v8i8 -; COST: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %v) -; CODE-LABEL: umax.i8.v8i8 -; CODE: umaxv b0, v0.8b define i8 @umax.i8.v8i8(<8 x i8> %v) { +; COST-LABEL: 'umax.i8.v8i8' +; COST-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %v) +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r +; %r = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %v) ret i8 %r } -; COST-LABEL: umax.i8.v16i8 -; COST: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %v) -; CODE-LABEL: umax.i8.v16i8 -; CODE: umaxv b0, v0.16b define i8 @umax.i8.v16i8(<16 x i8> %v) { +; COST-LABEL: 'umax.i8.v16i8' +; COST-NEXT: Cost Model: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %v) +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r +; %r = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %v) ret i8 %r } -; COST-LABEL: umax.i16.v4i16 -; COST: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %v) -; CODE-LABEL: umax.i16.v4i16 -; CODE: umaxv h0, v0.4h define i16 @umax.i16.v4i16(<4 x i16> %v) { +; COST-LABEL: 'umax.i16.v4i16' +; COST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %v) +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r +; %r = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %v) ret i16 %r } -; COST-LABEL: umax.i16.v8i16 -; COST: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %v) -; CODE-LABEL: umax.i16.v8i16 -; CODE: umaxv h0, v0.8h define i16 @umax.i16.v8i16(<8 x i16> %v) { +; COST-LABEL: 'umax.i16.v8i16' +; COST-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %v) +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r +; %r = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %v) ret i16 %r } -; COST-LABEL: umax.i32.v4i32 -; COST: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %v) -; CODE-LABEL: umax.i32.v4i32 -; CODE: umaxv s0, v0.4s define i32 @umax.i32.v4i32(<4 x i32> %v) { +; COST-LABEL: 'umax.i32.v4i32' +; COST-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %v) +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r +; %r = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %v) ret i32 %r } -; COST-LABEL: smin.i8.v8i8 -; COST: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %v) -; CODE-LABEL: smin.i8.v8i8 -; CODE: sminv b0, v0.8b define i8 @smin.i8.v8i8(<8 x i8> %v) { +; COST-LABEL: 'smin.i8.v8i8' +; COST-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %v) +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r +; %r = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %v) ret i8 %r } -; COST-LABEL: smin.i8.v16i8 -; COST: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %v) -; CODE-LABEL: smin.i8.v16i8 -; CODE: sminv b0, v0.16b define i8 @smin.i8.v16i8(<16 x i8> %v) { +; COST-LABEL: 'smin.i8.v16i8' +; COST-NEXT: Cost Model: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %v) +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r +; %r = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %v) ret i8 %r } -; COST-LABEL: smin.i16.v4i16 -; COST: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %v) -; CODE-LABEL: smin.i16.v4i16 -; CODE: sminv h0, v0.4h define i16 @smin.i16.v4i16(<4 x i16> %v) { +; COST-LABEL: 'smin.i16.v4i16' +; COST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %v) +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r +; %r = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %v) ret i16 %r } -; COST-LABEL: smin.i16.v8i16 -; COST: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %v) -; CODE-LABEL: smin.i16.v8i16 -; CODE: sminv h0, v0.8h define i16 @smin.i16.v8i16(<8 x i16> %v) { +; COST-LABEL: 'smin.i16.v8i16' +; COST-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %v) +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r +; %r = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %v) ret i16 %r } -; COST-LABEL: smin.i32.v4i32 -; COST: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %v) -; CODE-LABEL: smin.i32.v4i32 -; CODE: sminv s0, v0.4s define i32 @smin.i32.v4i32(<4 x i32> %v) { +; COST-LABEL: 'smin.i32.v4i32' +; COST-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %v) +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r +; %r = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %v) ret i32 %r } -; COST-LABEL: smax.i8.v8i8 -; COST: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %v) -; CODE-LABEL: smax.i8.v8i8 -; CODE: smaxv b0, v0.8b define i8 @smax.i8.v8i8(<8 x i8> %v) { +; COST-LABEL: 'smax.i8.v8i8' +; COST-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %r = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %v) +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r +; %r = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %v) ret i8 %r } -; COST-LABEL: smax.i8.v16i8 -; COST: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %v) -; CODE-LABEL: smax.i8.v16i8 -; CODE: smaxv b0, v0.16b define i8 @smax.i8.v16i8(<16 x i8> %v) { +; COST-LABEL: 'smax.i8.v16i8' +; COST-NEXT: Cost Model: Found an estimated cost of 608 for instruction: %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %v) +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %r +; %r = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %v) ret i8 %r } -; COST-LABEL: smax.i16.v4i16 -; COST: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %v) -; CODE-LABEL: smax.i16.v4i16 -; CODE: smaxv h0, v0.4h define i16 @smax.i16.v4i16(<4 x i16> %v) { +; COST-LABEL: 'smax.i16.v4i16' +; COST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %r = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %v) +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r +; %r = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %v) ret i16 %r } -; COST-LABEL: smax.i16.v8i16 -; COST: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %v) -; CODE-LABEL: smax.i16.v8i16 -; CODE: smaxv h0, v0.8h define i16 @smax.i16.v8i16(<8 x i16> %v) { +; COST-LABEL: 'smax.i16.v8i16' +; COST-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %v) +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %r +; %r = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %v) ret i16 %r } -; COST-LABEL: smax.i32.v4i32 -; COST: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %v) -; CODE-LABEL: smax.i32.v4i32 -; CODE: smaxv s0, v0.4s define i32 @smax.i32.v4i32(<4 x i32> %v) { +; COST-LABEL: 'smax.i32.v4i32' +; COST-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %r = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %v) +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %r +; %r = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %v) ret i32 %r } -; COST-LABEL: fmin.f32.v4f32 -; COST: Found an estimated cost of 34 for instruction: %r = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v) -; CODE-LABEL: fmin.f32.v4f32 -; CODE: fminnmv s0, v0.4s define float @fmin.f32.v4f32(<4 x float> %v) { +; COST-LABEL: 'fmin.f32.v4f32' +; COST-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %r = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v) +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r +; %r = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v) ret float %r } -; COST-LABEL: fmax.f32.v4f32 -; COST: Found an estimated cost of 34 for instruction: %r = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v) -; CODE-LABEL: fmax.f32.v4f32 -; CODE: fmaxnmv s0, v0.4s define float @fmax.f32.v4f32(<4 x float> %v) { +; COST-LABEL: 'fmax.f32.v4f32' +; COST-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %r = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v) +; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %r +; %r = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v) ret float %r }