diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-bit.ll b/llvm/test/Analysis/CostModel/ARM/reduce-bit.ll new file mode 100644 index 0000000000000..e0ea2ab10d239 --- /dev/null +++ b/llvm/test/Analysis/CostModel/ARM/reduce-bit.ll @@ -0,0 +1,217 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=armv8a-linux-gnueabihf -mattr=+fp64 -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-V8 +; RUN: opt < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-MVEI + +define void @and() { +; CHECK-V8-LABEL: 'and' +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v1i64 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i64 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %v4i64 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i32 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %v4i32 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %v8i32 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i16 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %v4i16 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %v8i16 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %v16i16 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i8 = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %v4i8 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %v8i8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 391 for instruction: %v16i8 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 488 for instruction: %v32i8 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVEI-LABEL: 'and' +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v1i64 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %v2i64 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %v4i64 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v2i32 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v4i32 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %v8i32 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v2i16 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v4i16 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 394 for instruction: %v8i16 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 524 for instruction: %v16i16 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v2i8 = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v4i8 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 394 for instruction: %v8i8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 1036 for instruction: %v16i8 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 1294 for instruction: %v32i8 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +entry: + %v1i64 = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> undef) + %v2i64 = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> undef) + %v4i64 = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> undef) + %v2i32 = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> undef) + %v4i32 = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> undef) + %v8i32 = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> undef) + %v2i16 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> undef) + %v4i16 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> undef) + %v8i16 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> undef) + %v16i16 = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> undef) + %v2i8 = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> undef) + %v4i8 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> undef) + %v8i8 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> undef) + %v16i8 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> undef) + %v32i8 = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> undef) + ret void +} + +define void @or() { +; CHECK-V8-LABEL: 'or' +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v1i64 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i64 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %v4i64 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i32 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %v4i32 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %v8i32 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i16 = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %v4i16 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %v8i16 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %v16i16 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i8 = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %v4i8 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %v8i8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 391 for instruction: %v16i8 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 488 for instruction: %v32i8 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVEI-LABEL: 'or' +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v1i64 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %v2i64 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %v4i64 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v2i32 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v4i32 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %v8i32 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v2i16 = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v4i16 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 394 for instruction: %v8i16 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 524 for instruction: %v16i16 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v2i8 = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v4i8 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 394 for instruction: %v8i8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 1036 for instruction: %v16i8 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 1294 for instruction: %v32i8 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +entry: + %v1i64 = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> undef) + %v2i64 = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> undef) + %v4i64 = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> undef) + %v2i32 = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> undef) + %v4i32 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> undef) + %v8i32 = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> undef) + %v2i16 = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> undef) + %v4i16 = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> undef) + %v8i16 = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> undef) + %v16i16 = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> undef) + %v2i8 = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> undef) + %v4i8 = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> undef) + %v8i8 = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> undef) + %v16i8 = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> undef) + %v32i8 = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> undef) + ret void +} + +define void @xor() { +; CHECK-V8-LABEL: 'xor' +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v1i64 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i64 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %v4i64 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i32 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %v4i32 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %v8i32 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i16 = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %v4i16 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %v8i16 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %v16i16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i8 = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %v4i8 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %v8i8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 391 for instruction: %v16i8 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 488 for instruction: %v32i8 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVEI-LABEL: 'xor' +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v1i64 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %v2i64 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %v4i64 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v2i32 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v4i32 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %v8i32 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v2i16 = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v4i16 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 394 for instruction: %v8i16 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 524 for instruction: %v16i16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v2i8 = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v4i8 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 394 for instruction: %v8i8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 1036 for instruction: %v16i8 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 1294 for instruction: %v32i8 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +entry: + %v1i64 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef) + %v2i64 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef) + %v4i64 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef) + %v2i32 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef) + %v4i32 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef) + %v8i32 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef) + %v2i16 = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> undef) + %v4i16 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef) + %v8i16 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef) + %v16i16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef) + %v2i8 = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> undef) + %v4i8 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef) + %v8i8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef) + %v16i8 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef) + %v32i8 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef) + ret void +} + +declare i16 @llvm.vector.reduce.and.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.and.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.and.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.and.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.or.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.or.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.or.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.or.v8i16(<8 x i16>) +declare i16 @llvm.vector.reduce.xor.v16i16(<16 x i16>) +declare i16 @llvm.vector.reduce.xor.v2i16(<2 x i16>) +declare i16 @llvm.vector.reduce.xor.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.xor.v8i16(<8 x i16>) +declare i32 @llvm.vector.reduce.and.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.and.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.or.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.or.v8i32(<8 x i32>) +declare i32 @llvm.vector.reduce.xor.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32>) +declare i32 @llvm.vector.reduce.xor.v8i32(<8 x i32>) +declare i64 @llvm.vector.reduce.and.v1i64(<1 x i64>) +declare i64 @llvm.vector.reduce.and.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.and.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.or.v1i64(<1 x i64>) +declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.or.v4i64(<4 x i64>) +declare i64 @llvm.vector.reduce.xor.v1i64(<1 x i64>) +declare i64 @llvm.vector.reduce.xor.v2i64(<2 x i64>) +declare i64 @llvm.vector.reduce.xor.v4i64(<4 x i64>) +declare i8 @llvm.vector.reduce.and.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.and.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.and.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.and.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.and.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.or.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.or.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.or.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.or.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.or.v2i8(<2 x i8>) +declare i8 @llvm.vector.reduce.xor.v16i8(<16 x i8>) +declare i8 @llvm.vector.reduce.xor.v32i8(<32 x i8>) +declare i8 @llvm.vector.reduce.xor.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.xor.v4i8(<4 x i8>) +declare i8 @llvm.vector.reduce.xor.v2i8(<2 x i8>) diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-fminmax.ll b/llvm/test/Analysis/CostModel/ARM/reduce-fminmax.ll new file mode 100644 index 0000000000000..14b27062eebb6 --- /dev/null +++ b/llvm/test/Analysis/CostModel/ARM/reduce-fminmax.ll @@ -0,0 +1,244 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=armv8a-linux-gnueabihf -mattr=+fp64 -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-V8 +; RUN: opt < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-MVEFP +; RUN: opt < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-MVEI + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + +define void @fmin_strict() { +; CHECK-V8-LABEL: 'fmin_strict' +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fmin_v2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %fmin_v4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %fmin_v8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %fmin_v16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %fmin_v2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %fmin_v4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %fmin_v8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %fmin_v2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %fmin_v4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %fmin_v4f128 = call fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVEFP-LABEL: 'fmin_strict' +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %fmin_v2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmin_v4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %fmin_v8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %fmin_v16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %fmin_v2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmin_v4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %fmin_v8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmin_v2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %fmin_v4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %fmin_v4f128 = call fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVEI-LABEL: 'fmin_strict' +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmin_v2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %fmin_v4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 409 for instruction: %fmin_v8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 545 for instruction: %fmin_v16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmin_v2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %fmin_v4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 205 for instruction: %fmin_v8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmin_v2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %fmin_v4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %fmin_v4f128 = call fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %fmin_v2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef) + %fmin_v4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef) + %fmin_v8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef) + %fmin_v16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef) + %fmin_v2f32 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef) + %fmin_v4f32 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef) + %fmin_v8f32 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef) + %fmin_v2f64 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef) + %fmin_v4f64 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef) + %fmin_v4f128 = call fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef) + ret void +} + + +define void @fmin_unordered() { +; CHECK-V8-LABEL: 'fmin_unordered' +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fmin_v2f16 = call reassoc half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %fmin_v4f16 = call reassoc half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %fmin_v8f16 = call reassoc half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %fmin_v16f16 = call reassoc half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %fmin_v2f32 = call reassoc float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %fmin_v4f32 = call reassoc float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %fmin_v8f32 = call reassoc float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %fmin_v2f64 = call reassoc double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %fmin_v4f64 = call reassoc double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %fmin_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVEFP-LABEL: 'fmin_unordered' +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %fmin_v2f16 = call reassoc half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmin_v4f16 = call reassoc half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %fmin_v8f16 = call reassoc half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %fmin_v16f16 = call reassoc half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %fmin_v2f32 = call reassoc float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmin_v4f32 = call reassoc float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %fmin_v8f32 = call reassoc float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmin_v2f64 = call reassoc double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %fmin_v4f64 = call reassoc double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %fmin_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVEI-LABEL: 'fmin_unordered' +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmin_v2f16 = call reassoc half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %fmin_v4f16 = call reassoc half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 409 for instruction: %fmin_v8f16 = call reassoc half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 545 for instruction: %fmin_v16f16 = call reassoc half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmin_v2f32 = call reassoc float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %fmin_v4f32 = call reassoc float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 205 for instruction: %fmin_v8f32 = call reassoc float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmin_v2f64 = call reassoc double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %fmin_v4f64 = call reassoc double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %fmin_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %fmin_v2f16 = call reassoc half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef) + %fmin_v4f16 = call reassoc half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef) + %fmin_v8f16 = call reassoc half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef) + %fmin_v16f16 = call reassoc half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef) + %fmin_v2f32 = call reassoc float @llvm.vector.reduce.fmin.v2f32(<2 x float> undef) + %fmin_v4f32 = call reassoc float @llvm.vector.reduce.fmin.v4f32(<4 x float> undef) + %fmin_v8f32 = call reassoc float @llvm.vector.reduce.fmin.v8f32(<8 x float> undef) + %fmin_v2f64 = call reassoc double @llvm.vector.reduce.fmin.v2f64(<2 x double> undef) + %fmin_v4f64 = call reassoc double @llvm.vector.reduce.fmin.v4f64(<4 x double> undef) + %fmin_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128> undef) + ret void +} + +define void @fmax_strict() { +; CHECK-V8-LABEL: 'fmax_strict' +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fmax_v2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %fmax_v4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %fmax_v8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %fmax_v16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %fmax_v2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %fmax_v4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %fmax_v8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %fmax_v2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %fmax_v4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %fmax_v4f128 = call fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVEFP-LABEL: 'fmax_strict' +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %fmax_v2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmax_v4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %fmax_v8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %fmax_v16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %fmax_v2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmax_v4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %fmax_v8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmax_v2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %fmax_v4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %fmax_v4f128 = call fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVEI-LABEL: 'fmax_strict' +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmax_v2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %fmax_v4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 409 for instruction: %fmax_v8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 545 for instruction: %fmax_v16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmax_v2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %fmax_v4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 205 for instruction: %fmax_v8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmax_v2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %fmax_v4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %fmax_v4f128 = call fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %fmax_v2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef) + %fmax_v4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef) + %fmax_v8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef) + %fmax_v16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef) + %fmax_v2f32 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef) + %fmax_v4f32 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef) + %fmax_v8f32 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef) + %fmax_v2f64 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef) + %fmax_v4f64 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef) + %fmax_v4f128 = call fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef) + ret void +} + + +define void @fmax_unordered() { +; CHECK-V8-LABEL: 'fmax_unordered' +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fmax_v2f16 = call reassoc half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %fmax_v4f16 = call reassoc half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %fmax_v8f16 = call reassoc half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %fmax_v16f16 = call reassoc half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %fmax_v2f32 = call reassoc float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %fmax_v4f32 = call reassoc float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %fmax_v8f32 = call reassoc float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %fmax_v2f64 = call reassoc double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %fmax_v4f64 = call reassoc double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %fmax_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVEFP-LABEL: 'fmax_unordered' +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %fmax_v2f16 = call reassoc half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmax_v4f16 = call reassoc half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %fmax_v8f16 = call reassoc half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %fmax_v16f16 = call reassoc half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %fmax_v2f32 = call reassoc float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmax_v4f32 = call reassoc float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %fmax_v8f32 = call reassoc float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmax_v2f64 = call reassoc double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %fmax_v4f64 = call reassoc double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %fmax_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVEI-LABEL: 'fmax_unordered' +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmax_v2f16 = call reassoc half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %fmax_v4f16 = call reassoc half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 409 for instruction: %fmax_v8f16 = call reassoc half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 545 for instruction: %fmax_v16f16 = call reassoc half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmax_v2f32 = call reassoc float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %fmax_v4f32 = call reassoc float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 205 for instruction: %fmax_v8f32 = call reassoc float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %fmax_v2f64 = call reassoc double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %fmax_v4f64 = call reassoc double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %fmax_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %fmax_v2f16 = call reassoc half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef) + %fmax_v4f16 = call reassoc half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef) + %fmax_v8f16 = call reassoc half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef) + %fmax_v16f16 = call reassoc half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef) + %fmax_v2f32 = call reassoc float @llvm.vector.reduce.fmax.v2f32(<2 x float> undef) + %fmax_v4f32 = call reassoc float @llvm.vector.reduce.fmax.v4f32(<4 x float> undef) + %fmax_v8f32 = call reassoc float @llvm.vector.reduce.fmax.v8f32(<8 x float> undef) + %fmax_v2f64 = call reassoc double @llvm.vector.reduce.fmax.v2f64(<2 x double> undef) + %fmax_v4f64 = call reassoc double @llvm.vector.reduce.fmax.v4f64(<4 x double> undef) + %fmax_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128> undef) + ret void +} + + +declare half @llvm.vector.reduce.fmin.v2f16(<2 x half>) +declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>) +declare half @llvm.vector.reduce.fmin.v8f16(<8 x half>) +declare half @llvm.vector.reduce.fmin.v16f16(<16 x half>) +declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>) +declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) +declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>) +declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>) +declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>) +declare fp128 @llvm.vector.reduce.fmin.v4f128(<4 x fp128>) + + +declare half @llvm.vector.reduce.fmax.v2f16(<2 x half>) +declare half @llvm.vector.reduce.fmax.v4f16(<4 x half>) +declare half @llvm.vector.reduce.fmax.v8f16(<8 x half>) +declare half @llvm.vector.reduce.fmax.v16f16(<16 x half>) +declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>) +declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) +declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>) +declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>) +declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>) +declare fp128 @llvm.vector.reduce.fmax.v4f128(<4 x fp128>) diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-fp.ll b/llvm/test/Analysis/CostModel/ARM/reduce-fp.ll new file mode 100644 index 0000000000000..26ee8155f17ba --- /dev/null +++ b/llvm/test/Analysis/CostModel/ARM/reduce-fp.ll @@ -0,0 +1,244 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=armv8a-linux-gnueabihf -mattr=+fp64 -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-V8 +; RUN: opt < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-MVEFP +; RUN: opt < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefix=CHECK-MVEI + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + +define void @fadd_strict() { +; CHECK-V8-LABEL: 'fadd_strict' +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVEFP-LABEL: 'fadd_strict' +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVEI-LABEL: 'fadd_strict' +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %fadd_v2f16 = call half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef) + %fadd_v4f16 = call half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef) + %fadd_v8f16 = call half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef) + %fadd_v16f16 = call half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef) + %fadd_v2f32 = call float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef) + %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef) + %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef) + %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef) + %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef) + %fadd_v4f128 = call fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef) + ret void +} + + +define void @fadd_unordered() { +; CHECK-V8-LABEL: 'fadd_unordered' +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fadd_v2f16 = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %fadd_v4f16 = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %fadd_v8f16 = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %fadd_v16f16 = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %fadd_v2f32 = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %fadd_v4f32 = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %fadd_v8f32 = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %fadd_v2f64 = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %fadd_v4f64 = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVEFP-LABEL: 'fadd_unordered' +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %fadd_v2f16 = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %fadd_v4f16 = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %fadd_v8f16 = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %fadd_v16f16 = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %fadd_v2f32 = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %fadd_v4f32 = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %fadd_v8f32 = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fadd_v2f64 = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %fadd_v4f64 = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVEI-LABEL: 'fadd_unordered' +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fadd_v2f16 = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %fadd_v4f16 = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 169 for instruction: %fadd_v8f16 = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 225 for instruction: %fadd_v16f16 = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fadd_v2f32 = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %fadd_v4f32 = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %fadd_v8f32 = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fadd_v2f64 = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %fadd_v4f64 = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %fadd_v2f16 = call reassoc half @llvm.vector.reduce.fadd.v2f16(half 0.0, <2 x half> undef) + %fadd_v4f16 = call reassoc half @llvm.vector.reduce.fadd.v4f16(half 0.0, <4 x half> undef) + %fadd_v8f16 = call reassoc half @llvm.vector.reduce.fadd.v8f16(half 0.0, <8 x half> undef) + %fadd_v16f16 = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0.0, <16 x half> undef) + %fadd_v2f32 = call reassoc float @llvm.vector.reduce.fadd.v2f32(float 0.0, <2 x float> undef) + %fadd_v4f32 = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef) + %fadd_v8f32 = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef) + %fadd_v2f64 = call reassoc double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef) + %fadd_v4f64 = call reassoc double @llvm.vector.reduce.fadd.v4f64(double 0.0, <4 x double> undef) + %fadd_v4f128 = call reassoc fp128 @llvm.vector.reduce.fadd.v4f128(fp128 undef, <4 x fp128> undef) + ret void +} + +define void @fmul_strict() { +; CHECK-V8-LABEL: 'fmul_strict' +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v2f16 = call half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fmul_v4f16 = call half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %fmul_v8f16 = call half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %fmul_v16f16 = call half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v2f32 = call float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVEFP-LABEL: 'fmul_strict' +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f16 = call half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f16 = call half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fmul_v8f16 = call half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %fmul_v16f16 = call half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f32 = call float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVEI-LABEL: 'fmul_strict' +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f16 = call half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f16 = call half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fmul_v8f16 = call half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %fmul_v16f16 = call half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f32 = call float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %fmul_v2f16 = call half @llvm.vector.reduce.fmul.v2f16(half 0.0, <2 x half> undef) + %fmul_v4f16 = call half @llvm.vector.reduce.fmul.v4f16(half 0.0, <4 x half> undef) + %fmul_v8f16 = call half @llvm.vector.reduce.fmul.v8f16(half 0.0, <8 x half> undef) + %fmul_v16f16 = call half @llvm.vector.reduce.fmul.v16f16(half 0.0, <16 x half> undef) + %fmul_v2f32 = call float @llvm.vector.reduce.fmul.v2f32(float 0.0, <2 x float> undef) + %fmul_v4f32 = call float @llvm.vector.reduce.fmul.v4f32(float 0.0, <4 x float> undef) + %fmul_v8f32 = call float @llvm.vector.reduce.fmul.v8f32(float 0.0, <8 x float> undef) + %fmul_v2f64 = call double @llvm.vector.reduce.fmul.v2f64(double 0.0, <2 x double> undef) + %fmul_v4f64 = call double @llvm.vector.reduce.fmul.v4f64(double 0.0, <4 x double> undef) + %fmul_v4f128 = call fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef) + ret void +} + + +define void @fmul_unordered() { +; CHECK-V8-LABEL: 'fmul_unordered' +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %fmul_v2f16 = call reassoc half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %fmul_v4f16 = call reassoc half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %fmul_v8f16 = call reassoc half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %fmul_v16f16 = call reassoc half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %fmul_v2f32 = call reassoc float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %fmul_v4f32 = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %fmul_v8f32 = call reassoc float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %fmul_v2f64 = call reassoc double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %fmul_v4f64 = call reassoc double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %fmul_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef) +; CHECK-V8-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVEFP-LABEL: 'fmul_unordered' +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %fmul_v2f16 = call reassoc half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %fmul_v4f16 = call reassoc half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %fmul_v8f16 = call reassoc half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %fmul_v16f16 = call reassoc half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %fmul_v2f32 = call reassoc float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %fmul_v4f32 = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %fmul_v8f32 = call reassoc float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fmul_v2f64 = call reassoc double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %fmul_v4f64 = call reassoc double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %fmul_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef) +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-MVEI-LABEL: 'fmul_unordered' +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fmul_v2f16 = call reassoc half @llvm.vector.reduce.fmul.v2f16(half 0xH0000, <2 x half> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %fmul_v4f16 = call reassoc half @llvm.vector.reduce.fmul.v4f16(half 0xH0000, <4 x half> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 169 for instruction: %fmul_v8f16 = call reassoc half @llvm.vector.reduce.fmul.v8f16(half 0xH0000, <8 x half> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 225 for instruction: %fmul_v16f16 = call reassoc half @llvm.vector.reduce.fmul.v16f16(half 0xH0000, <16 x half> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fmul_v2f32 = call reassoc float @llvm.vector.reduce.fmul.v2f32(float 0.000000e+00, <2 x float> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %fmul_v4f32 = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 0.000000e+00, <4 x float> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %fmul_v8f32 = call reassoc float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fmul_v2f64 = call reassoc double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %fmul_v4f64 = call reassoc double @llvm.vector.reduce.fmul.v4f64(double 0.000000e+00, <4 x double> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %fmul_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef) +; CHECK-MVEI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %fmul_v2f16 = call reassoc half @llvm.vector.reduce.fmul.v2f16(half 0.0, <2 x half> undef) + %fmul_v4f16 = call reassoc half @llvm.vector.reduce.fmul.v4f16(half 0.0, <4 x half> undef) + %fmul_v8f16 = call reassoc half @llvm.vector.reduce.fmul.v8f16(half 0.0, <8 x half> undef) + %fmul_v16f16 = call reassoc half @llvm.vector.reduce.fmul.v16f16(half 0.0, <16 x half> undef) + %fmul_v2f32 = call reassoc float @llvm.vector.reduce.fmul.v2f32(float 0.0, <2 x float> undef) + %fmul_v4f32 = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 0.0, <4 x float> undef) + %fmul_v8f32 = call reassoc float @llvm.vector.reduce.fmul.v8f32(float 0.0, <8 x float> undef) + %fmul_v2f64 = call reassoc double @llvm.vector.reduce.fmul.v2f64(double 0.0, <2 x double> undef) + %fmul_v4f64 = call reassoc double @llvm.vector.reduce.fmul.v4f64(double 0.0, <4 x double> undef) + %fmul_v4f128 = call reassoc fp128 @llvm.vector.reduce.fmul.v4f128(fp128 undef, <4 x fp128> undef) + ret void +} + + +declare half @llvm.vector.reduce.fadd.v2f16(half, <2 x half>) +declare half @llvm.vector.reduce.fadd.v4f16(half, <4 x half>) +declare half @llvm.vector.reduce.fadd.v8f16(half, <8 x half>) +declare half @llvm.vector.reduce.fadd.v16f16(half, <16 x half>) +declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>) +declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>) +declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>) +declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>) +declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>) +declare fp128 @llvm.vector.reduce.fadd.v4f128(fp128, <4 x fp128>) + + +declare half @llvm.vector.reduce.fmul.v2f16(half, <2 x half>) +declare half @llvm.vector.reduce.fmul.v4f16(half, <4 x half>) +declare half @llvm.vector.reduce.fmul.v8f16(half, <8 x half>) +declare half @llvm.vector.reduce.fmul.v16f16(half, <16 x half>) +declare float @llvm.vector.reduce.fmul.v2f32(float, <2 x float>) +declare float @llvm.vector.reduce.fmul.v4f32(float, <4 x float>) +declare float @llvm.vector.reduce.fmul.v8f32(float, <8 x float>) +declare double @llvm.vector.reduce.fmul.v2f64(double, <2 x double>) +declare double @llvm.vector.reduce.fmul.v4f64(double, <4 x double>) +declare fp128 @llvm.vector.reduce.fmul.v4f128(fp128, <4 x fp128>)