diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 4761a502026cd..c27a0e5c22859 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -817,13 +817,12 @@ int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, if (ST->hasMVEIntegerOps() && (Opcode == Instruction::InsertElement || Opcode == Instruction::ExtractElement)) { - // We say MVE moves costs at least the MVEVectorCostFactor, even though - // they are scalar instructions. This helps prevent mixing scalar and - // vector, to prevent vectorising where we end up just scalarising the - // result anyway. - return std::max(BaseT::getVectorInstrCost(Opcode, ValTy, Index), - ST->getMVEVectorCostFactor(TTI::TCK_RecipThroughput)) * - cast(ValTy)->getNumElements() / 2; + // Integer cross-lane moves are more expensive than float, which can + // sometimes just be vmovs. Integer involve being passes to GPR registers, + // causing more of a delay. + std::pair LT = + getTLI()->getTypeLegalizationCost(DL, ValTy->getScalarType()); + return LT.first * (ValTy->getScalarType()->isIntegerTy() ? 4 : 1); } return BaseT::getVectorInstrCost(Opcode, ValTy, Index); diff --git a/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll b/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll index ce052b3398e51..5a74d68f8a736 100644 --- a/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll +++ b/llvm/test/Analysis/CostModel/ARM/arith-overflow.ll @@ -67,21 +67,21 @@ define i32 @sadd(i32 %arg) { ; ; MVE-RECIP-LABEL: 'sadd' ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 242 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 866 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 370 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 738 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 498 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1890 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 274 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 546 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 298 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1874 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7332 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 530 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1060 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1098 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7316 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 28968 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1044 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2088 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; V8M-SIZE-LABEL: 'sadd' @@ -124,21 +124,21 @@ define i32 @sadd(i32 %arg) { ; ; MVE-SIZE-LABEL: 'sadd' ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 787 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 107 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 211 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 779 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3092 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 404 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3084 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12310 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 790 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 undef, i64 undef) @@ -225,21 +225,21 @@ define i32 @uadd(i32 %arg) { ; ; MVE-RECIP-LABEL: 'uadd' ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1040 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1032 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4112 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; V8M-SIZE-LABEL: 'uadd' @@ -282,21 +282,21 @@ define i32 @uadd(i32 %arg) { ; ; MVE-SIZE-LABEL: 'uadd' ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1032 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1028 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4104 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 undef, i64 undef) @@ -383,21 +383,21 @@ define i32 @ssub(i32 %arg) { ; ; MVE-RECIP-LABEL: 'ssub' ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 242 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 866 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 370 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 738 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 498 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1890 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 274 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 546 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 298 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1874 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7332 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 530 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1060 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1098 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7316 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 28968 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1044 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2088 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; V8M-SIZE-LABEL: 'ssub' @@ -440,21 +440,21 @@ define i32 @ssub(i32 %arg) { ; ; MVE-SIZE-LABEL: 'ssub' ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 787 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 107 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 211 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 779 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3092 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 404 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3084 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12310 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 790 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 undef, i64 undef) @@ -541,21 +541,21 @@ define i32 @usub(i32 %arg) { ; ; MVE-RECIP-LABEL: 'usub' ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1040 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1032 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4112 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; V8M-SIZE-LABEL: 'usub' @@ -598,21 +598,21 @@ define i32 @usub(i32 %arg) { ; ; MVE-SIZE-LABEL: 'usub' ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1032 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1028 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4104 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 undef, i64 undef) @@ -699,21 +699,21 @@ define i32 @smul(i32 %arg) { ; ; MVE-RECIP-LABEL: 'smul' ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2112 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 380 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1464 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 348 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1016 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1512 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 616 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1252 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4712 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 356 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 872 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; V8M-SIZE-LABEL: 'smul' @@ -756,21 +756,21 @@ define i32 @smul(i32 %arg) { ; ; MVE-SIZE-LABEL: 'smul' ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 325 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 816 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 368 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 332 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1174 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 278 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1164 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4374 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 268 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 534 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = call {i64, i1} @llvm.smul.with.overflow.i64(i64 undef, i64 undef) @@ -857,21 +857,21 @@ define i32 @umul(i32 %arg) { ; ; MVE-RECIP-LABEL: 'umul' ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 248 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 624 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1456 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 344 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1008 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1504 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 608 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1248 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4704 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 352 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 864 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; V8M-SIZE-LABEL: 'umul' @@ -914,21 +914,21 @@ define i32 @umul(i32 %arg) { ; ; MVE-SIZE-LABEL: 'umul' ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 181 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 812 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 364 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1170 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 274 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1162 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4370 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 530 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %I64 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 undef, i64 undef) diff --git a/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll b/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll index 251392af24c70..2aefb3a8fd597 100644 --- a/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll +++ b/llvm/test/Analysis/CostModel/ARM/arith-ssat.ll @@ -85,22 +85,22 @@ define i32 @add(i32 %arg) { ; ; MVE-RECIP-LABEL: 'add' ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 378 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1330 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 302 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 602 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1202 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) @@ -160,22 +160,22 @@ define i32 @add(i32 %arg) { ; ; MVE-SIZE-LABEL: 'add' ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %I64 = call i64 @llvm.sadd.sat.i64(i64 undef, i64 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V2I64 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V4I64 = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %V8I64 = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %I32 = call i32 @llvm.sadd.sat.i32(i32 undef, i32 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V2I32 = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) @@ -291,22 +291,22 @@ define i32 @sub(i32 %arg) { ; ; MVE-RECIP-LABEL: 'sub' ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 378 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1330 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 302 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 602 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1202 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) @@ -366,22 +366,22 @@ define i32 @sub(i32 %arg) { ; ; MVE-SIZE-LABEL: 'sub' ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %I64 = call i64 @llvm.ssub.sat.i64(i64 undef, i64 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V2I64 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V4I64 = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %V8I64 = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %I32 = call i32 @llvm.ssub.sat.i32(i32 undef, i32 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V2I32 = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/arith-usat.ll b/llvm/test/Analysis/CostModel/ARM/arith-usat.ll index 342956cb83e88..9baec1e7d25f0 100644 --- a/llvm/test/Analysis/CostModel/ARM/arith-usat.ll +++ b/llvm/test/Analysis/CostModel/ARM/arith-usat.ll @@ -85,22 +85,22 @@ define i32 @add(i32 %arg) { ; ; MVE-RECIP-LABEL: 'add' ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) @@ -160,22 +160,22 @@ define i32 @add(i32 %arg) { ; ; MVE-SIZE-LABEL: 'add' ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = call i64 @llvm.uadd.sat.i64(i64 undef, i64 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V2I64 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V4I64 = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> undef, <4 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %V8I64 = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.uadd.sat.i32(i32 undef, i32 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I32 = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> undef, <16 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> undef, <8 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> undef, <16 x i8> undef) @@ -291,22 +291,22 @@ define i32 @sub(i32 %arg) { ; ; MVE-RECIP-LABEL: 'sub' ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) @@ -366,22 +366,22 @@ define i32 @sub(i32 %arg) { ; ; MVE-SIZE-LABEL: 'sub' ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = call i64 @llvm.usub.sat.i64(i64 undef, i64 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V2I64 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V4I64 = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> undef, <4 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %V8I64 = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.usub.sat.i32(i32 undef, i32 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I32 = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> undef, <2 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> undef, <8 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> undef, <16 x i32> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> undef, <8 x i8> undef) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> undef, <16 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/arith.ll b/llvm/test/Analysis/CostModel/ARM/arith.ll index cbeaa8e97f065..f1f11e2484aca 100644 --- a/llvm/test/Analysis/CostModel/ARM/arith.ll +++ b/llvm/test/Analysis/CostModel/ARM/arith.ll @@ -375,12 +375,12 @@ define void @i64() { define void @vi8() { ; CHECK-MVE1-LABEL: 'vi8' -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c2 = add <2 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d2 = sub <2 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e2 = mul <2 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f2 = ashr <2 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g2 = lshr <2 x i8> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h2 = shl <2 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %c2 = add <2 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %d2 = sub <2 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e2 = mul <2 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = ashr <2 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %g2 = lshr <2 x i8> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %h2 = shl <2 x i8> undef, undef ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i8> undef, undef ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i8> undef, undef ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i8> undef, undef @@ -414,12 +414,12 @@ define void @vi8() { ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE2-LABEL: 'vi8' -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %c2 = add <2 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %d2 = sub <2 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %e2 = mul <2 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %f2 = ashr <2 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %g2 = lshr <2 x i8> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %h2 = shl <2 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %c2 = add <2 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %d2 = sub <2 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e2 = mul <2 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = ashr <2 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %g2 = lshr <2 x i8> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %h2 = shl <2 x i8> undef, undef ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i2 = and <2 x i8> undef, undef ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j2 = or <2 x i8> undef, undef ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k2 = xor <2 x i8> undef, undef @@ -609,12 +609,12 @@ define void @vi8() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-SIZE-LABEL: 'vi8' -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %c2 = add <2 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %d2 = sub <2 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %e2 = mul <2 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %f2 = ashr <2 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %g2 = lshr <2 x i8> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %h2 = shl <2 x i8> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %c2 = add <2 x i8> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %d2 = sub <2 x i8> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e2 = mul <2 x i8> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = ashr <2 x i8> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %g2 = lshr <2 x i8> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %h2 = shl <2 x i8> undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i8> undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i8> undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i8> undef, undef @@ -688,12 +688,12 @@ define void @vi8() { define void @vi16() { ; CHECK-MVE1-LABEL: 'vi16' -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c2 = add <2 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d2 = sub <2 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e2 = mul <2 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f2 = ashr <2 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g2 = lshr <2 x i16> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h2 = shl <2 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %c2 = add <2 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %d2 = sub <2 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e2 = mul <2 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = ashr <2 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %g2 = lshr <2 x i16> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %h2 = shl <2 x i16> undef, undef ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i16> undef, undef ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i16> undef, undef ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i16> undef, undef @@ -727,12 +727,12 @@ define void @vi16() { ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE2-LABEL: 'vi16' -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %c2 = add <2 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %d2 = sub <2 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %e2 = mul <2 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %f2 = ashr <2 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %g2 = lshr <2 x i16> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %h2 = shl <2 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %c2 = add <2 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %d2 = sub <2 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e2 = mul <2 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = ashr <2 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %g2 = lshr <2 x i16> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %h2 = shl <2 x i16> undef, undef ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i2 = and <2 x i16> undef, undef ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j2 = or <2 x i16> undef, undef ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k2 = xor <2 x i16> undef, undef @@ -922,12 +922,12 @@ define void @vi16() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-SIZE-LABEL: 'vi16' -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %c2 = add <2 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %d2 = sub <2 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %e2 = mul <2 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %f2 = ashr <2 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %g2 = lshr <2 x i16> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %h2 = shl <2 x i16> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %c2 = add <2 x i16> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %d2 = sub <2 x i16> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e2 = mul <2 x i16> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = ashr <2 x i16> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %g2 = lshr <2 x i16> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %h2 = shl <2 x i16> undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i16> undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i16> undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i16> undef, undef @@ -1001,12 +1001,12 @@ define void @vi16() { define void @vi32() { ; CHECK-MVE1-LABEL: 'vi32' -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c2 = add <2 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d2 = sub <2 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e2 = mul <2 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f2 = ashr <2 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %g2 = lshr <2 x i32> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %h2 = shl <2 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %c2 = add <2 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %d2 = sub <2 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e2 = mul <2 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = ashr <2 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %g2 = lshr <2 x i32> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %h2 = shl <2 x i32> undef, undef ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i32> undef, undef ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i32> undef, undef ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i32> undef, undef @@ -1040,12 +1040,12 @@ define void @vi32() { ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE2-LABEL: 'vi32' -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %c2 = add <2 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %d2 = sub <2 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %e2 = mul <2 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %f2 = ashr <2 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %g2 = lshr <2 x i32> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %h2 = shl <2 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %c2 = add <2 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %d2 = sub <2 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e2 = mul <2 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = ashr <2 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %g2 = lshr <2 x i32> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %h2 = shl <2 x i32> undef, undef ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i2 = and <2 x i32> undef, undef ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j2 = or <2 x i32> undef, undef ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k2 = xor <2 x i32> undef, undef @@ -1235,12 +1235,12 @@ define void @vi32() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-SIZE-LABEL: 'vi32' -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %c2 = add <2 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %d2 = sub <2 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %e2 = mul <2 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %f2 = ashr <2 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %g2 = lshr <2 x i32> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %h2 = shl <2 x i32> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %c2 = add <2 x i32> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %d2 = sub <2 x i32> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %e2 = mul <2 x i32> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f2 = ashr <2 x i32> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %g2 = lshr <2 x i32> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %h2 = shl <2 x i32> undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i32> undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i32> undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i32> undef, undef @@ -1314,21 +1314,21 @@ define void @vi32() { define void @vi64() { ; CHECK-MVE1-LABEL: 'vi64' -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c2 = add <2 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %d2 = sub <2 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e2 = mul <2 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f2 = ashr <2 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %g2 = lshr <2 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %h2 = shl <2 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %c2 = add <2 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %d2 = sub <2 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %e2 = mul <2 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f2 = ashr <2 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %g2 = lshr <2 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %h2 = shl <2 x i64> undef, undef ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i64> undef, undef ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i64> undef, undef ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %c4 = add <4 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %d4 = sub <4 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %e4 = mul <4 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %f4 = ashr <4 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %g4 = lshr <4 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %h4 = shl <4 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %c4 = add <4 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %d4 = sub <4 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e4 = mul <4 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %f4 = ashr <4 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %g4 = lshr <4 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %h4 = shl <4 x i64> undef, undef ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i4 = and <4 x i64> undef, undef ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j4 = or <4 x i64> undef, undef ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k4 = xor <4 x i64> undef, undef @@ -1341,33 +1341,33 @@ define void @vi64() { ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i8 = and <8 x i64> undef, undef ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j8 = or <8 x i64> undef, undef ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k8 = xor <8 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %c16 = add <16 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %d16 = sub <16 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %e16 = mul <16 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %f16 = ashr <16 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %g16 = lshr <16 x i64> undef, undef -; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %h16 = shl <16 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %c16 = add <16 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %d16 = sub <16 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e16 = mul <16 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %f16 = ashr <16 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %g16 = lshr <16 x i64> undef, undef +; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %h16 = shl <16 x i64> undef, undef ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %i16 = and <16 x i64> undef, undef ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %j16 = or <16 x i64> undef, undef ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %k16 = xor <16 x i64> undef, undef ; CHECK-MVE1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE2-LABEL: 'vi64' -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c2 = add <2 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %d2 = sub <2 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e2 = mul <2 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f2 = ashr <2 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %g2 = lshr <2 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %h2 = shl <2 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %c2 = add <2 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %d2 = sub <2 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %e2 = mul <2 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f2 = ashr <2 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %g2 = lshr <2 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %h2 = shl <2 x i64> undef, undef ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i2 = and <2 x i64> undef, undef ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j2 = or <2 x i64> undef, undef ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k2 = xor <2 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %c4 = add <4 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %d4 = sub <4 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %e4 = mul <4 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %f4 = ashr <4 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %g4 = lshr <4 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %h4 = shl <4 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %c4 = add <4 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %d4 = sub <4 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e4 = mul <4 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %f4 = ashr <4 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %g4 = lshr <4 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %h4 = shl <4 x i64> undef, undef ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i4 = and <4 x i64> undef, undef ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j4 = or <4 x i64> undef, undef ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k4 = xor <4 x i64> undef, undef @@ -1380,24 +1380,24 @@ define void @vi64() { ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %i8 = and <8 x i64> undef, undef ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %j8 = or <8 x i64> undef, undef ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %k8 = xor <8 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %c16 = add <16 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %d16 = sub <16 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %e16 = mul <16 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %f16 = ashr <16 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %g16 = lshr <16 x i64> undef, undef -; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %h16 = shl <16 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %c16 = add <16 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %d16 = sub <16 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e16 = mul <16 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %f16 = ashr <16 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %g16 = lshr <16 x i64> undef, undef +; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %h16 = shl <16 x i64> undef, undef ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %i16 = and <16 x i64> undef, undef ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %j16 = or <16 x i64> undef, undef ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %k16 = xor <16 x i64> undef, undef ; CHECK-MVE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE4-LABEL: 'vi64' -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %c2 = add <2 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %d2 = sub <2 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %e2 = mul <2 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %f2 = ashr <2 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %g2 = lshr <2 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %h2 = shl <2 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %c2 = add <2 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %d2 = sub <2 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %e2 = mul <2 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f2 = ashr <2 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %g2 = lshr <2 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %h2 = shl <2 x i64> undef, undef ; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i2 = and <2 x i64> undef, undef ; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j2 = or <2 x i64> undef, undef ; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k2 = xor <2 x i64> undef, undef @@ -1410,21 +1410,21 @@ define void @vi64() { ; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %i4 = and <4 x i64> undef, undef ; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %j4 = or <4 x i64> undef, undef ; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %k4 = xor <4 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %c8 = add <8 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %d8 = sub <8 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %e8 = mul <8 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %f8 = ashr <8 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %g8 = lshr <8 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %h8 = shl <8 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %c8 = add <8 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %d8 = sub <8 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %e8 = mul <8 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %f8 = ashr <8 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %g8 = lshr <8 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %h8 = shl <8 x i64> undef, undef ; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %i8 = and <8 x i64> undef, undef ; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %j8 = or <8 x i64> undef, undef ; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %k8 = xor <8 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %c16 = add <16 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %d16 = sub <16 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %e16 = mul <16 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %f16 = ashr <16 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %g16 = lshr <16 x i64> undef, undef -; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %h16 = shl <16 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %c16 = add <16 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %d16 = sub <16 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e16 = mul <16 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %f16 = ashr <16 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %g16 = lshr <16 x i64> undef, undef +; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %h16 = shl <16 x i64> undef, undef ; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %i16 = and <16 x i64> undef, undef ; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %j16 = or <16 x i64> undef, undef ; CHECK-MVE4-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %k16 = xor <16 x i64> undef, undef @@ -1548,21 +1548,21 @@ define void @vi64() { ; CHECK-V8R-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-SIZE-LABEL: 'vi64' -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c2 = add <2 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %d2 = sub <2 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e2 = mul <2 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %f2 = ashr <2 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %g2 = lshr <2 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %h2 = shl <2 x i64> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %c2 = add <2 x i64> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %d2 = sub <2 x i64> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %e2 = mul <2 x i64> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f2 = ashr <2 x i64> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %g2 = lshr <2 x i64> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %h2 = shl <2 x i64> undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i2 = and <2 x i64> undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %j2 = or <2 x i64> undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %k2 = xor <2 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %c4 = add <4 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %d4 = sub <4 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %e4 = mul <4 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %f4 = ashr <4 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %g4 = lshr <4 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %h4 = shl <4 x i64> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %c4 = add <4 x i64> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %d4 = sub <4 x i64> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e4 = mul <4 x i64> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %f4 = ashr <4 x i64> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %g4 = lshr <4 x i64> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %h4 = shl <4 x i64> undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %i4 = and <4 x i64> undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %j4 = or <4 x i64> undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %k4 = xor <4 x i64> undef, undef @@ -1575,12 +1575,12 @@ define void @vi64() { ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %i8 = and <8 x i64> undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %j8 = or <8 x i64> undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %k8 = xor <8 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %c16 = add <16 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %d16 = sub <16 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %e16 = mul <16 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %f16 = ashr <16 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %g16 = lshr <16 x i64> undef, undef -; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %h16 = shl <16 x i64> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %c16 = add <16 x i64> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %d16 = sub <16 x i64> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %e16 = mul <16 x i64> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %f16 = ashr <16 x i64> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %g16 = lshr <16 x i64> undef, undef +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %h16 = shl <16 x i64> undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %i16 = and <16 x i64> undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %j16 = or <16 x i64> undef, undef ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %k16 = xor <16 x i64> undef, undef diff --git a/llvm/test/Analysis/CostModel/ARM/cast.ll b/llvm/test/Analysis/CostModel/ARM/cast.ll index f8377ad3f0c19..bf2488eec97b2 100644 --- a/llvm/test/Analysis/CostModel/ARM/cast.ll +++ b/llvm/test/Analysis/CostModel/ARM/cast.ll @@ -557,36 +557,36 @@ define i32 @casts() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r87hf = fpext <4 x half> undef to <4 x float> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r88hf = fpext <8 x half> undef to <8 x float> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %r89hf = fpext <16 x half> undef to <16 x float> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r93 = fptosi <2 x float> undef to <2 x i8> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r94 = fptoui <2 x float> undef to <2 x i16> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r95 = fptosi <2 x float> undef to <2 x i16> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r96 = fptoui <2 x float> undef to <2 x i32> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r97 = fptosi <2 x float> undef to <2 x i32> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r90h = fptoui <2 x half> undef to <2 x i1> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r91h = fptosi <2 x half> undef to <2 x i1> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r92h = fptoui <2 x half> undef to <2 x i8> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r93h = fptosi <2 x half> undef to <2 x i8> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r94h = fptoui <2 x half> undef to <2 x i16> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r95h = fptosi <2 x half> undef to <2 x i16> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r96h = fptoui <2 x half> undef to <2 x i32> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r97h = fptosi <2 x half> undef to <2 x i32> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r98h = fptoui <2 x half> undef to <2 x i64> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r99h = fptosi <2 x half> undef to <2 x i64> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r108 = fptoui <2 x double> undef to <2 x i64> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r109 = fptosi <2 x double> undef to <2 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %r90 = fptoui <2 x float> undef to <2 x i1> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %r91 = fptosi <2 x float> undef to <2 x i1> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %r92 = fptoui <2 x float> undef to <2 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %r93 = fptosi <2 x float> undef to <2 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %r94 = fptoui <2 x float> undef to <2 x i16> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %r95 = fptosi <2 x float> undef to <2 x i16> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %r96 = fptoui <2 x float> undef to <2 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %r97 = fptosi <2 x float> undef to <2 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %r98 = fptoui <2 x float> undef to <2 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %r99 = fptosi <2 x float> undef to <2 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %r90h = fptoui <2 x half> undef to <2 x i1> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %r91h = fptosi <2 x half> undef to <2 x i1> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %r92h = fptoui <2 x half> undef to <2 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %r93h = fptosi <2 x half> undef to <2 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %r94h = fptoui <2 x half> undef to <2 x i16> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %r95h = fptosi <2 x half> undef to <2 x i16> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %r96h = fptoui <2 x half> undef to <2 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %r97h = fptosi <2 x half> undef to <2 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %r98h = fptoui <2 x half> undef to <2 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %r99h = fptosi <2 x half> undef to <2 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %r100 = fptoui <2 x double> undef to <2 x i1> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %r101 = fptosi <2 x double> undef to <2 x i1> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %r102 = fptoui <2 x double> undef to <2 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %r103 = fptosi <2 x double> undef to <2 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %r104 = fptoui <2 x double> undef to <2 x i16> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %r105 = fptosi <2 x double> undef to <2 x i16> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %r106 = fptoui <2 x double> undef to <2 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %r107 = fptosi <2 x double> undef to <2 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %r108 = fptoui <2 x double> undef to <2 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %r109 = fptosi <2 x double> undef to <2 x i64> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r110 = fptoui <4 x float> undef to <4 x i1> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r111 = fptosi <4 x float> undef to <4 x i1> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r112 = fptoui <4 x float> undef to <4 x i8> @@ -595,8 +595,8 @@ define i32 @casts() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r115 = fptosi <4 x float> undef to <4 x i16> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r116 = fptoui <4 x float> undef to <4 x i32> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r117 = fptosi <4 x float> undef to <4 x i32> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 274 for instruction: %r118 = fptoui <4 x float> undef to <4 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 274 for instruction: %r119 = fptosi <4 x float> undef to <4 x i64> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r110h = fptoui <4 x half> undef to <4 x i1> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r111h = fptosi <4 x half> undef to <4 x i1> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r112h = fptoui <4 x half> undef to <4 x i8> @@ -605,18 +605,18 @@ define i32 @casts() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r115h = fptosi <4 x half> undef to <4 x i16> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r116h = fptoui <4 x half> undef to <4 x i32> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r117h = fptosi <4 x half> undef to <4 x i32> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r118h = fptoui <4 x half> undef to <4 x i64> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r119h = fptosi <4 x half> undef to <4 x i64> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 274 for instruction: %r118h = fptoui <4 x half> undef to <4 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 274 for instruction: %r119h = fptosi <4 x half> undef to <4 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %r120 = fptoui <4 x double> undef to <4 x i1> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %r121 = fptosi <4 x double> undef to <4 x i1> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %r122 = fptoui <4 x double> undef to <4 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %r123 = fptosi <4 x double> undef to <4 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %r124 = fptoui <4 x double> undef to <4 x i16> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %r125 = fptosi <4 x double> undef to <4 x i16> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %r126 = fptoui <4 x double> undef to <4 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %r127 = fptosi <4 x double> undef to <4 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %r128 = fptoui <4 x double> undef to <4 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %r129 = fptosi <4 x double> undef to <4 x i64> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r130 = fptoui <8 x float> undef to <8 x i1> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r131 = fptosi <8 x float> undef to <8 x i1> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r132 = fptoui <8 x float> undef to <8 x i8> @@ -625,8 +625,8 @@ define i32 @casts() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r135 = fptosi <8 x float> undef to <8 x i16> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r136 = fptoui <8 x float> undef to <8 x i32> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r137 = fptosi <8 x float> undef to <8 x i32> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1096 for instruction: %r138 = fptoui <8 x float> undef to <8 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1096 for instruction: %r139 = fptosi <8 x float> undef to <8 x i64> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r130h = fptoui <8 x half> undef to <8 x i1> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r131h = fptosi <8 x half> undef to <8 x i1> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r132h = fptoui <8 x half> undef to <8 x i8> @@ -635,18 +635,18 @@ define i32 @casts() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r135h = fptosi <8 x half> undef to <8 x i16> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r136h = fptoui <8 x half> undef to <8 x i32> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r137h = fptosi <8 x half> undef to <8 x i32> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r138h = fptoui <8 x half> undef to <8 x i64> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r139h = fptosi <8 x half> undef to <8 x i64> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1098 for instruction: %r138h = fptoui <8 x half> undef to <8 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1098 for instruction: %r139h = fptosi <8 x half> undef to <8 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 586 for instruction: %r140 = fptoui <8 x double> undef to <8 x i1> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 586 for instruction: %r141 = fptosi <8 x double> undef to <8 x i1> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 586 for instruction: %r142 = fptoui <8 x double> undef to <8 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 586 for instruction: %r143 = fptosi <8 x double> undef to <8 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 586 for instruction: %r144 = fptoui <8 x double> undef to <8 x i16> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 586 for instruction: %r145 = fptosi <8 x double> undef to <8 x i16> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 584 for instruction: %r146 = fptoui <8 x double> undef to <8 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 584 for instruction: %r147 = fptosi <8 x double> undef to <8 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1088 for instruction: %r148 = fptoui <8 x double> undef to <8 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1088 for instruction: %r149 = fptosi <8 x double> undef to <8 x i64> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r150 = fptoui <16 x float> undef to <16 x i1> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r151 = fptosi <16 x float> undef to <16 x i1> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r152 = fptoui <16 x float> undef to <16 x i8> @@ -655,8 +655,8 @@ define i32 @casts() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r155 = fptosi <16 x float> undef to <16 x i16> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r156 = fptoui <16 x float> undef to <16 x i32> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r157 = fptosi <16 x float> undef to <16 x i32> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4384 for instruction: %r158 = fptoui <16 x float> undef to <16 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4384 for instruction: %r159 = fptosi <16 x float> undef to <16 x i64> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r150h = fptoui <16 x half> undef to <16 x i1> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r151h = fptosi <16 x half> undef to <16 x i1> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r152h = fptoui <16 x half> undef to <16 x i8> @@ -665,18 +665,18 @@ define i32 @casts() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r155h = fptosi <16 x half> undef to <16 x i16> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r156h = fptoui <16 x half> undef to <16 x i32> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r157h = fptosi <16 x half> undef to <16 x i32> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %r158h = fptoui <16 x half> undef to <16 x i64> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %r159h = fptosi <16 x half> undef to <16 x i64> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4392 for instruction: %r158h = fptoui <16 x half> undef to <16 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4392 for instruction: %r159h = fptosi <16 x half> undef to <16 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2346 for instruction: %r160 = fptoui <16 x double> undef to <16 x i1> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2346 for instruction: %r161 = fptosi <16 x double> undef to <16 x i1> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2346 for instruction: %r162 = fptoui <16 x double> undef to <16 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2346 for instruction: %r163 = fptosi <16 x double> undef to <16 x i8> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2344 for instruction: %r164 = fptoui <16 x double> undef to <16 x i16> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2344 for instruction: %r165 = fptosi <16 x double> undef to <16 x i16> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2336 for instruction: %r166 = fptoui <16 x double> undef to <16 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2336 for instruction: %r167 = fptosi <16 x double> undef to <16 x i32> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4352 for instruction: %r168 = fptoui <16 x double> undef to <16 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4352 for instruction: %r169 = fptosi <16 x double> undef to <16 x i64> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r170 = uitofp <2 x i1> undef to <2 x float> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r171 = sitofp <2 x i1> undef to <2 x float> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r172 = uitofp <2 x i8> undef to <2 x float> @@ -697,16 +697,16 @@ define i32 @casts() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r177h = sitofp <2 x i32> undef to <2 x half> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r178h = uitofp <2 x i64> undef to <2 x half> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r179h = sitofp <2 x i64> undef to <2 x half> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r188 = uitofp <2 x i64> undef to <2 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r189 = sitofp <2 x i64> undef to <2 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r183 = sitofp <2 x i8> undef to <2 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r184 = uitofp <2 x i16> undef to <2 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r185 = sitofp <2 x i16> undef to <2 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r186 = uitofp <2 x i32> undef to <2 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r187 = sitofp <2 x i32> undef to <2 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r188 = uitofp <2 x i64> undef to <2 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %r189 = sitofp <2 x i64> undef to <2 x double> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r190 = uitofp <4 x i1> undef to <4 x float> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r191 = sitofp <4 x i1> undef to <4 x float> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r192 = uitofp <4 x i8> undef to <4 x float> @@ -727,16 +727,16 @@ define i32 @casts() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r197h = sitofp <4 x i32> undef to <4 x half> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r198h = uitofp <4 x i64> undef to <4 x half> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r199h = sitofp <4 x i64> undef to <4 x half> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %r203 = sitofp <4 x i8> undef to <4 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %r204 = uitofp <4 x i16> undef to <4 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %r205 = sitofp <4 x i16> undef to <4 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %r206 = uitofp <4 x i32> undef to <4 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %r207 = sitofp <4 x i32> undef to <4 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %r208 = uitofp <4 x i64> undef to <4 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %r209 = sitofp <4 x i64> undef to <4 x double> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r210 = uitofp <8 x i1> undef to <8 x float> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r211 = sitofp <8 x i1> undef to <8 x float> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r212 = uitofp <8 x i8> undef to <8 x float> @@ -757,16 +757,16 @@ define i32 @casts() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r217h = sitofp <8 x i32> undef to <8 x half> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r218h = uitofp <8 x i64> undef to <8 x half> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r219h = sitofp <8 x i64> undef to <8 x half> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 394 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 394 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 394 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 394 for instruction: %r223 = sitofp <8 x i8> undef to <8 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 394 for instruction: %r224 = uitofp <8 x i16> undef to <8 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 394 for instruction: %r225 = sitofp <8 x i16> undef to <8 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 394 for instruction: %r226 = uitofp <8 x i16> undef to <8 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 394 for instruction: %r227 = sitofp <8 x i16> undef to <8 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %r228 = uitofp <8 x i64> undef to <8 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %r229 = sitofp <8 x i64> undef to <8 x double> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r230 = uitofp <16 x i1> undef to <16 x float> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r231 = sitofp <16 x i1> undef to <16 x float> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %r232 = uitofp <16 x i8> undef to <16 x float> @@ -787,16 +787,16 @@ define i32 @casts() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r237h = sitofp <16 x i32> undef to <16 x half> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %r238h = uitofp <16 x i64> undef to <16 x half> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %r239h = sitofp <16 x i64> undef to <16 x half> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2090 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2090 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2090 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2090 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2088 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2088 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2088 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2088 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1578 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1578 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1578 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1578 for instruction: %r243 = sitofp <16 x i8> undef to <16 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1576 for instruction: %r244 = uitofp <16 x i16> undef to <16 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1576 for instruction: %r245 = sitofp <16 x i16> undef to <16 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1576 for instruction: %r246 = uitofp <16 x i16> undef to <16 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1576 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1536 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1536 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-RECIP-LABEL: 'casts' diff --git a/llvm/test/Analysis/CostModel/ARM/cast_ldst.ll b/llvm/test/Analysis/CostModel/ARM/cast_ldst.ll index 4406e27acabad..afa616260b3e9 100644 --- a/llvm/test/Analysis/CostModel/ARM/cast_ldst.ll +++ b/llvm/test/Analysis/CostModel/ARM/cast_ldst.ll @@ -81,14 +81,14 @@ define i32 @load_extends() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi16 = load i16, i16* undef, align 2 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi32 = load i32, i32* undef, align 4 -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %loadv2i8 = load <2 x i8>, <2 x i8>* undef, align 2 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4i8 = load <4 x i8>, <4 x i8>* undef, align 4 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv8i8 = load <8 x i8>, <8 x i8>* undef, align 8 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv16i8 = load <16 x i8>, <16 x i8>* undef, align 16 -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %loadv2i16 = load <2 x i16>, <2 x i16>* undef, align 4 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4i16 = load <4 x i16>, <4 x i16>* undef, align 8 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv8i16 = load <8 x i16>, <8 x i16>* undef, align 16 -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %loadv2i32 = load <2 x i32>, <2 x i32>* undef, align 8 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4i32 = load <4 x i32>, <4 x i32>* undef, align 16 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = sext i8 %loadi8 to i16 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = zext i8 %loadi8 to i16 @@ -774,9 +774,9 @@ define i32 @store_trunc() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1632, i16* undef, align 2 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %i1664, i16* undef, align 2 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %i3264, i32* undef, align 4 -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <2 x i8> %v2816, <2 x i8>* undef, align 2 -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <2 x i8> %v2832, <2 x i8>* undef, align 2 -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <2 x i8> %v2864, <2 x i8>* undef, align 2 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: store <2 x i8> %v2816, <2 x i8>* undef, align 2 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: store <2 x i8> %v2832, <2 x i8>* undef, align 2 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: store <2 x i8> %v2864, <2 x i8>* undef, align 2 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8> %v4816, <4 x i8>* undef, align 4 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8> %v4832, <4 x i8>* undef, align 4 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8> %v4864, <4 x i8>* undef, align 4 @@ -786,13 +786,13 @@ define i32 @store_trunc() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i8> %v16816, <16 x i8>* undef, align 16 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i8> %v16832, <16 x i8>* undef, align 16 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i8> %v16864, <16 x i8>* undef, align 16 -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <2 x i16> %v21632, <2 x i16>* undef, align 4 -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <2 x i16> %v21664, <2 x i16>* undef, align 4 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: store <2 x i16> %v21632, <2 x i16>* undef, align 4 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: store <2 x i16> %v21664, <2 x i16>* undef, align 4 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i16> %v41632, <4 x i16>* undef, align 8 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i16> %v41664, <4 x i16>* undef, align 8 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> %v81632, <8 x i16>* undef, align 16 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 16 -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 16 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -1273,11 +1273,11 @@ define i32 @load_fpextends() { ; CHECK-MVE-RECIP-LABEL: 'load_fpextends' ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf32 = load float, float* undef, align 4 -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %loadv2f16 = load <2 x half>, <2 x half>* undef, align 4 -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %loadv4f16 = load <4 x half>, <4 x half>* undef, align 8 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %loadv2f16 = load <2 x half>, <2 x half>* undef, align 4 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %loadv4f16 = load <4 x half>, <4 x half>* undef, align 8 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv8f16 = load <8 x half>, <8 x half>* undef, align 16 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv16f16 = load <16 x half>, <16 x half>* undef, align 32 -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %loadv2f32 = load <2 x float>, <2 x float>* undef, align 8 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %loadv2f32 = load <2 x float>, <2 x float>* undef, align 8 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %loadv4f32 = load <4 x float>, <4 x float>* undef, align 16 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %loadv8f32 = load <8 x float>, <8 x float>* undef, align 32 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r1 = fpext half %loadf16 to float @@ -1567,13 +1567,13 @@ define i32 @load_fptrunc() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1632, half* undef, align 2 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store half %i1664, half* undef, align 2 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float %i3264, float* undef, align 4 -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <2 x half> %v21632, <2 x half>* undef, align 4 -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <2 x half> %v21664, <2 x half>* undef, align 4 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <2 x half> %v21632, <2 x half>* undef, align 4 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <2 x half> %v21664, <2 x half>* undef, align 4 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x half> %v41632, <4 x half>* undef, align 8 -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 34 for instruction: store <4 x half> %v41664, <4 x half>* undef, align 8 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <4 x half> %v41664, <4 x half>* undef, align 8 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x half> %v81632, <8 x half>* undef, align 16 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 16 -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 16 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; diff --git a/llvm/test/Analysis/CostModel/ARM/cmps.ll b/llvm/test/Analysis/CostModel/ARM/cmps.ll index 75f2bd58b2128..bda8763ef18da 100644 --- a/llvm/test/Analysis/CostModel/ARM/cmps.ll +++ b/llvm/test/Analysis/CostModel/ARM/cmps.ll @@ -22,9 +22,9 @@ define i32 @cmps() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a9 = fcmp ogt double undef, undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a10 = fcmp olt <8 x half> undef, undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a11 = fcmp oge <4 x float> undef, undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %a12 = fcmp oge <2 x double> undef, undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a12 = fcmp oge <2 x double> undef, undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %p = icmp eq i32* undef, undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %q = icmp eq <4 x i32*> undef, undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %q = icmp eq <4 x i32*> undef, undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-RECIP-LABEL: 'cmps' @@ -183,8 +183,8 @@ define void @minmax() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s4 = select <4 x i1> %c4, <4 x i32> undef, <4 x i32> undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %c5 = icmp slt i32* undef, undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s5 = select i1 %c5, i32* undef, i32* undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %c6 = icmp slt <4 x i32*> undef, undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %s6 = select <4 x i1> %c6, <4 x i32*> undef, <4 x i32*> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %c6 = icmp slt <4 x i32*> undef, undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %s6 = select <4 x i1> %c6, <4 x i32*> undef, <4 x i32*> undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-RECIP-LABEL: 'minmax' diff --git a/llvm/test/Analysis/CostModel/ARM/divrem.ll b/llvm/test/Analysis/CostModel/ARM/divrem.ll index de5ec2034a35d..0d9fae25cbb3e 100644 --- a/llvm/test/Analysis/CostModel/ARM/divrem.ll +++ b/llvm/test/Analysis/CostModel/ARM/divrem.ll @@ -423,22 +423,22 @@ define void @vi8() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vi8' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t1 = sdiv <2 x i8> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t2 = udiv <2 x i8> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i8> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i8> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i8> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i8> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i8> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i8> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i8> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i8> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i8> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i8> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e1 = sdiv <8 x i8> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e2 = udiv <8 x i8> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e3 = srem <8 x i8> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e4 = urem <8 x i8> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s1 = sdiv <16 x i8> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s2 = udiv <16 x i8> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s3 = srem <16 x i8> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s4 = urem <16 x i8> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i8> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i8> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i8> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i8> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i8> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i8> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i8> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i8> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vi8' @@ -538,22 +538,22 @@ define void @vi16() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vi16' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t1 = sdiv <2 x i16> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t2 = udiv <2 x i16> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i16> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i16> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i16> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i16> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i16> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i16> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i16> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i16> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i16> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i16> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e1 = sdiv <8 x i16> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e2 = udiv <8 x i16> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e3 = srem <8 x i16> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e4 = urem <8 x i16> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s1 = sdiv <16 x i16> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s2 = udiv <16 x i16> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s3 = srem <16 x i16> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s4 = urem <16 x i16> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i16> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i16> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i16> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i16> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i16> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i16> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i16> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i16> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vi16' @@ -653,22 +653,22 @@ define void @vi32() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vi32' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t1 = sdiv <2 x i32> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t2 = udiv <2 x i32> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i32> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i32> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i32> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i32> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i32> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i32> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i32> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i32> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i32> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i32> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e1 = sdiv <8 x i32> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e2 = udiv <8 x i32> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e3 = srem <8 x i32> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e4 = urem <8 x i32> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s1 = sdiv <16 x i32> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s2 = udiv <16 x i32> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s3 = srem <16 x i32> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s4 = urem <16 x i32> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i32> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i32> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i32> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i32> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i32> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i32> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i32> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i32> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vi32' @@ -768,22 +768,22 @@ define void @vi64() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vi64' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t1 = sdiv <2 x i64> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t2 = udiv <2 x i64> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i64> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i64> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %f1 = sdiv <4 x i64> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %f2 = udiv <4 x i64> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i64> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i64> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %t1 = sdiv <2 x i64> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %t2 = udiv <2 x i64> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %t3 = srem <2 x i64> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %t4 = urem <2 x i64> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %f1 = sdiv <4 x i64> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %f2 = udiv <4 x i64> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %f3 = srem <4 x i64> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %f4 = urem <4 x i64> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %e1 = sdiv <8 x i64> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %e2 = udiv <8 x i64> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e3 = srem <8 x i64> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e4 = urem <8 x i64> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %s1 = sdiv <16 x i64> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %s2 = udiv <16 x i64> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s3 = srem <16 x i64> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s4 = urem <16 x i64> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %s1 = sdiv <16 x i64> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %s2 = udiv <16 x i64> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %s3 = srem <16 x i64> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %s4 = urem <16 x i64> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vi64' @@ -873,12 +873,12 @@ define void @vf16() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vf16' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = fdiv <2 x half> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %2 = frem <2 x half> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %3 = fdiv <4 x half> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %4 = frem <4 x half> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %5 = fdiv <8 x half> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %6 = frem <8 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x half> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vf16' @@ -928,12 +928,12 @@ define void @vf32() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vf32' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = fdiv <2 x float> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %2 = frem <2 x float> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %3 = fdiv <4 x float> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %4 = frem <4 x float> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %5 = fdiv <8 x float> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %6 = frem <8 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x float> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vf32' @@ -983,12 +983,12 @@ define void @vf64() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vf64' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = fdiv <2 x double> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %2 = frem <2 x double> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %3 = fdiv <4 x double> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %4 = frem <4 x double> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %5 = fdiv <8 x double> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %6 = frem <8 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x double> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vf64' @@ -1048,22 +1048,22 @@ define void @vi8_2() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vi8_2' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t1 = sdiv <2 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t2 = udiv <2 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i8> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i8> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i8> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i8> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i8> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i8> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i8> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i8> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e1 = sdiv <8 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e2 = udiv <8 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e3 = srem <8 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e4 = urem <8 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s1 = sdiv <16 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s2 = udiv <16 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s3 = srem <16 x i8> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s4 = urem <16 x i8> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i8> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i8> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i8> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i8> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i8> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i8> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i8> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i8> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vi8_2' @@ -1163,22 +1163,22 @@ define void @vi16_2() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vi16_2' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t1 = sdiv <2 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t2 = udiv <2 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i16> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i16> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i16> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i16> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i16> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i16> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i16> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i16> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e1 = sdiv <8 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e2 = udiv <8 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e3 = srem <8 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e4 = urem <8 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s1 = sdiv <16 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s2 = udiv <16 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s3 = srem <16 x i16> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s4 = urem <16 x i16> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i16> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i16> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i16> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i16> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i16> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i16> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i16> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i16> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vi16_2' @@ -1278,22 +1278,22 @@ define void @vi32_2() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vi32_2' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t1 = sdiv <2 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t2 = udiv <2 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i32> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i32> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i32> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i32> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i32> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i32> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i32> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i32> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e1 = sdiv <8 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e2 = udiv <8 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e3 = srem <8 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e4 = urem <8 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s1 = sdiv <16 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s2 = udiv <16 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s3 = srem <16 x i32> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s4 = urem <16 x i32> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i32> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i32> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i32> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i32> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i32> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i32> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i32> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i32> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vi32_2' @@ -1393,22 +1393,22 @@ define void @vi64_2() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vi64_2' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t1 = sdiv <2 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t2 = udiv <2 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %f1 = sdiv <4 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %f2 = udiv <4 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i64> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %t1 = sdiv <2 x i64> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %t2 = udiv <2 x i64> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %t3 = srem <2 x i64> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %t4 = urem <2 x i64> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %f1 = sdiv <4 x i64> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %f2 = udiv <4 x i64> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %f3 = srem <4 x i64> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %f4 = urem <4 x i64> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %e1 = sdiv <8 x i64> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %e2 = udiv <8 x i64> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e3 = srem <8 x i64> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e4 = urem <8 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %s1 = sdiv <16 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %s2 = udiv <16 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s3 = srem <16 x i64> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %s4 = urem <16 x i64> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %s1 = sdiv <16 x i64> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %s2 = udiv <16 x i64> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %s3 = srem <16 x i64> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %s4 = urem <16 x i64> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vi64_2' @@ -1498,12 +1498,12 @@ define void @vf16_2() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vf16_2' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = fdiv <2 x half> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %2 = frem <2 x half> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %3 = fdiv <4 x half> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %4 = frem <4 x half> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %5 = fdiv <8 x half> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %6 = frem <8 x half> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x half> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x half> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x half> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x half> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vf16_2' @@ -1553,12 +1553,12 @@ define void @vf32_2() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vf32_2' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = fdiv <2 x float> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %2 = frem <2 x float> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %3 = fdiv <4 x float> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %4 = frem <4 x float> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %5 = fdiv <8 x float> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %6 = frem <8 x float> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x float> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x float> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x float> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x float> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x float> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x float> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vf32_2' @@ -1608,12 +1608,12 @@ define void @vf64_2() { ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVE-LABEL: 'vf64_2' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = fdiv <2 x double> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %2 = frem <2 x double> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %3 = fdiv <4 x double> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %4 = frem <4 x double> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %5 = fdiv <8 x double> undef, -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %6 = frem <8 x double> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x double> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x double> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x double> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x double> undef, ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-V8M-MAIN-LABEL: 'vf64_2' diff --git a/llvm/test/Analysis/CostModel/ARM/fparith.ll b/llvm/test/Analysis/CostModel/ARM/fparith.ll index cb3d66edfa20c..3403ab25e490c 100644 --- a/llvm/test/Analysis/CostModel/ARM/fparith.ll +++ b/llvm/test/Analysis/CostModel/ARM/fparith.ll @@ -61,15 +61,15 @@ define void @f64() { define void @vf32() { ; CHECK-MVE-LABEL: 'vf32' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %c2 = fadd <2 x float> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %d2 = fsub <2 x float> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %e2 = fmul <2 x float> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %c4 = fadd <4 x float> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %d4 = fsub <4 x float> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %e4 = fmul <4 x float> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %c8 = fadd <8 x float> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %d8 = fsub <8 x float> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e8 = fmul <8 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c2 = fadd <2 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d2 = fsub <2 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e2 = fmul <2 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c4 = fadd <4 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %d4 = fsub <4 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e4 = fmul <4 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %c8 = fadd <8 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %d8 = fsub <8 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e8 = fmul <8 x float> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVEFP-LABEL: 'vf32' @@ -98,15 +98,15 @@ define void @vf32() { define void @vf16() { ; CHECK-MVE-LABEL: 'vf16' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %c2 = fadd <2 x half> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %d2 = fsub <2 x half> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %e2 = fmul <2 x half> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %c4 = fadd <4 x half> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %d4 = fsub <4 x half> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %e4 = fmul <4 x half> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %c8 = fadd <8 x half> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %d8 = fsub <8 x half> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e8 = fmul <8 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c2 = fadd <2 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d2 = fsub <2 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e2 = fmul <2 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c4 = fadd <4 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %d4 = fsub <4 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e4 = fmul <4 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %c8 = fadd <8 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %d8 = fsub <8 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e8 = fmul <8 x half> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVEFP-LABEL: 'vf16' @@ -135,27 +135,27 @@ define void @vf16() { define void @vf64() { ; CHECK-MVE-LABEL: 'vf64' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %c2 = fadd <2 x double> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %d2 = fsub <2 x double> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %e2 = fmul <2 x double> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %c4 = fadd <4 x double> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %d4 = fsub <4 x double> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %e4 = fmul <4 x double> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %c8 = fadd <8 x double> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %d8 = fsub <8 x double> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e8 = fmul <8 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c2 = fadd <2 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d2 = fsub <2 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e2 = fmul <2 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c4 = fadd <4 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %d4 = fsub <4 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e4 = fmul <4 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %c8 = fadd <8 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %d8 = fsub <8 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e8 = fmul <8 x double> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVEFP-LABEL: 'vf64' -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %c2 = fadd <2 x double> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %d2 = fsub <2 x double> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %e2 = fmul <2 x double> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %c4 = fadd <4 x double> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %d4 = fsub <4 x double> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %e4 = fmul <4 x double> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %c8 = fadd <8 x double> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %d8 = fsub <8 x double> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %e8 = fmul <8 x double> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %c2 = fadd <2 x double> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %d2 = fsub <2 x double> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e2 = fmul <2 x double> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %c4 = fadd <4 x double> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %d4 = fsub <4 x double> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %e4 = fmul <4 x double> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %c8 = fadd <8 x double> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %d8 = fsub <8 x double> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %e8 = fmul <8 x double> undef, undef ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %c2 = fadd <2 x double> undef, undef diff --git a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll index f64393cbe88c1..b8a6a9569c9a6 100644 --- a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll +++ b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll @@ -95,7 +95,7 @@ define void @fmuladd(float %a, float %b, float %c, <16 x float> %va, <16 x float define void @log2(float %a, <16 x float> %va) { ; THRU-LABEL: 'log2' ; THRU-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.log2.f32(float %a) -; THRU-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) +; THRU-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'log2' @@ -105,12 +105,12 @@ define void @log2(float %a, <16 x float> %va) { ; ; SIZE-LABEL: 'log2' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.log2.f32(float %a) -; SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) +; SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE_LATE-LABEL: 'log2' ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.log2.f32(float %a) -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %s = call float @llvm.log2.f32(float %a) @@ -121,7 +121,7 @@ define void @log2(float %a, <16 x float> %va) { define void @constrained_fadd(float %a, <16 x float> %va) { ; THRU-LABEL: 'constrained_fadd' ; THRU-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") -; THRU-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore") +; THRU-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore") ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'constrained_fadd' @@ -131,12 +131,12 @@ define void @constrained_fadd(float %a, <16 x float> %va) { ; ; SIZE-LABEL: 'constrained_fadd' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") -; SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore") +; SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore") ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE_LATE-LABEL: 'constrained_fadd' ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore") +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore") ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") @@ -147,7 +147,7 @@ define void @constrained_fadd(float %a, <16 x float> %va) { define void @fmaximum(float %a, float %b, <16 x float> %va, <16 x float> %vb) { ; THRU-LABEL: 'fmaximum' ; THRU-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b) -; THRU-NEXT: Cost Model: Found an estimated cost of 928 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) +; THRU-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'fmaximum' @@ -157,12 +157,12 @@ define void @fmaximum(float %a, float %b, <16 x float> %va, <16 x float> %vb) { ; ; SIZE-LABEL: 'fmaximum' ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b) -; SIZE-NEXT: Cost Model: Found an estimated cost of 784 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) +; SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE_LATE-LABEL: 'fmaximum' ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b) -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 928 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %s = call float @llvm.maximum.f32(float %a, float %b) @@ -225,7 +225,7 @@ define void @ctlz(i32 %a, <16 x i32> %va) { define void @fshl(i32 %a, i32 %b, i32 %c, <16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) { ; THRU-LABEL: 'fshl' ; THRU-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) -; THRU-NEXT: Cost Model: Found an estimated cost of 832 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) +; THRU-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'fshl' @@ -235,12 +235,12 @@ define void @fshl(i32 %a, i32 %b, i32 %c, <16 x i32> %va, <16 x i32> %vb, <16 x ; ; SIZE-LABEL: 'fshl' ; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) -; SIZE-NEXT: Cost Model: Found an estimated cost of 805 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) +; SIZE-NEXT: Cost Model: Found an estimated cost of 229 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE_LATE-LABEL: 'fshl' ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 826 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) @@ -250,7 +250,7 @@ define void @fshl(i32 %a, i32 %b, i32 %c, <16 x i32> %va, <16 x i32> %vb, <16 x define void @maskedgather(<16 x float*> %va, <16 x i1> %vb, <16 x float> %vc) { ; THRU-LABEL: 'maskedgather' -; THRU-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) +; THRU-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'maskedgather' @@ -258,11 +258,11 @@ define void @maskedgather(<16 x float*> %va, <16 x i1> %vb, <16 x float> %vc) { ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'maskedgather' -; SIZE-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) +; SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE_LATE-LABEL: 'maskedgather' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) @@ -271,7 +271,7 @@ define void @maskedgather(<16 x float*> %va, <16 x i1> %vb, <16 x float> %vc) { define void @maskedscatter(<16 x float> %va, <16 x float*> %vb, <16 x i1> %vc) { ; THRU-LABEL: 'maskedscatter' -; THRU-NEXT: Cost Model: Found an estimated cost of 576 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc) +; THRU-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc) ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'maskedscatter' @@ -279,11 +279,11 @@ define void @maskedscatter(<16 x float> %va, <16 x float*> %vb, <16 x i1> %vc) { ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'maskedscatter' -; SIZE-NEXT: Cost Model: Found an estimated cost of 576 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc) +; SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE_LATE-LABEL: 'maskedscatter' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 576 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc) ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc) @@ -292,7 +292,7 @@ define void @maskedscatter(<16 x float> %va, <16 x float*> %vb, <16 x i1> %vc) { define void @reduce_fmax(<16 x float> %va) { ; THRU-LABEL: 'reduce_fmax' -; THRU-NEXT: Cost Model: Found an estimated cost of 696 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) +; THRU-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'reduce_fmax' @@ -300,11 +300,11 @@ define void @reduce_fmax(<16 x float> %va) { ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fmax' -; SIZE-NEXT: Cost Model: Found an estimated cost of 685 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) +; SIZE-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE_LATE-LABEL: 'reduce_fmax' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 694 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) diff --git a/llvm/test/Analysis/CostModel/ARM/load_store.ll b/llvm/test/Analysis/CostModel/ARM/load_store.ll index 2ca4acda0fc2d..52f6e3cccac64 100644 --- a/llvm/test/Analysis/CostModel/ARM/load_store.ll +++ b/llvm/test/Analysis/CostModel/ARM/load_store.ll @@ -69,16 +69,16 @@ define void @stores() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 undef, i128* undef, align 4 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store float undef, float* undef, align 4 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store double undef, double* undef, align 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 2 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: store <2 x i8> undef, <2 x i8>* undef, align 1 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: store <2 x i16> undef, <2 x i16>* undef, align 2 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: store <2 x i32> undef, <2 x i32>* undef, align 4 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 4 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 4 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> undef, <8 x i16>* undef, align 2 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i8> undef, <16 x i8>* undef, align 1 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x float> undef, <4 x float>* undef, align 4 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x double> undef, <4 x double>* undef, align 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: store <2 x float> undef, <2 x float>* undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: store <2 x float> undef, <2 x float>* undef, align 4 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x double> undef, <2 x double>* undef, align 4 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i64> undef, <2 x i64>* undef, align 1 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> undef, <4 x i32>* undef, align 1 @@ -256,16 +256,16 @@ define void @loads() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %5 = load i128, i128* undef, align 4 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = load float, float* undef, align 4 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = load double, double* undef, align 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %8 = load <2 x i8>, <2 x i8>* undef, align 1 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %9 = load <2 x i16>, <2 x i16>* undef, align 2 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %10 = load <2 x i32>, <2 x i32>* undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %8 = load <2 x i8>, <2 x i8>* undef, align 1 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %9 = load <2 x i16>, <2 x i16>* undef, align 2 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %10 = load <2 x i32>, <2 x i32>* undef, align 4 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = load <2 x i64>, <2 x i64>* undef, align 4 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = load <4 x i32>, <4 x i32>* undef, align 4 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %13 = load <8 x i16>, <8 x i16>* undef, align 2 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = load <16 x i8>, <16 x i8>* undef, align 1 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = load <4 x float>, <4 x float>* undef, align 4 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %16 = load <4 x double>, <4 x double>* undef, align 4 -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %17 = load <2 x float>, <2 x float>* undef, align 4 +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %17 = load <2 x float>, <2 x float>* undef, align 4 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = load <2 x double>, <2 x double>* undef, align 4 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = load <2 x i64>, <2 x i64>* undef, align 1 ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %20 = load <4 x i32>, <4 x i32>* undef, align 1 diff --git a/llvm/test/Analysis/CostModel/ARM/mve-abs.ll b/llvm/test/Analysis/CostModel/ARM/mve-abs.ll index a69531c0379a4..ace2d1bee070c 100644 --- a/llvm/test/Analysis/CostModel/ARM/mve-abs.ll +++ b/llvm/test/Analysis/CostModel/ARM/mve-abs.ll @@ -31,22 +31,22 @@ declare <64 x i8> @llvm.abs.v64i8(<64 x i8>, i1) define i32 @abs(i32 %arg) { ; MVE-RECIP-LABEL: 'abs' ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V2I32 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2I32 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.abs.v8i8(<8 x i8> undef, i1 false) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> undef, i1 false) @@ -56,22 +56,22 @@ define i32 @abs(i32 %arg) { ; ; MVE-SIZE-LABEL: 'abs' ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I32 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false) -; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false) +; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = call <8 x i8> @llvm.abs.v8i8(<8 x i8> undef, i1 false) ; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> undef, i1 false) diff --git a/llvm/test/Analysis/CostModel/ARM/mve-cmp.ll b/llvm/test/Analysis/CostModel/ARM/mve-cmp.ll index ca53d85e556a3..c2af5e1cacb53 100644 --- a/llvm/test/Analysis/CostModel/ARM/mve-cmp.ll +++ b/llvm/test/Analysis/CostModel/ARM/mve-cmp.ll @@ -4,21 +4,21 @@ define void @icmp() { ; CHECK-LABEL: 'icmp' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2i8 = icmp slt <2 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v2i8 = icmp slt <2 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8 = icmp slt <4 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8 = icmp slt <8 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8 = icmp slt <16 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1028 for instruction: %v32i8 = icmp slt <32 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2i16 = icmp slt <2 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %v32i8 = icmp slt <32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v2i16 = icmp slt <2 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = icmp slt <4 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16 = icmp slt <8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %v16i16 = icmp slt <16 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2i32 = icmp slt <2 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v16i16 = icmp slt <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v2i32 = icmp slt <2 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32 = icmp slt <4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v8i32 = icmp slt <8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %v16i32 = icmp slt <16 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2i64 = icmp slt <2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v4i64 = icmp slt <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v8i32 = icmp slt <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v16i32 = icmp slt <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v2i64 = icmp slt <2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v4i64 = icmp slt <4 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %v8i64 = icmp slt <8 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; @@ -47,31 +47,31 @@ define void @icmp() { define void @fcmp() { ; CHECK-MVE-LABEL: 'fcmp' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2f16 = fcmp olt <2 x half> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v4f16 = fcmp olt <4 x half> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v8f16 = fcmp olt <8 x half> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v16f16 = fcmp olt <16 x half> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2f32 = fcmp olt <2 x float> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v4f32 = fcmp olt <4 x float> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v8f32 = fcmp olt <8 x float> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v16f32 = fcmp olt <16 x float> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2f64 = fcmp olt <2 x double> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v4f64 = fcmp olt <4 x double> undef, undef -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %v8f64 = fcmp olt <8 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2f16 = fcmp olt <2 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v4f16 = fcmp olt <4 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v8f16 = fcmp olt <8 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v16f16 = fcmp olt <16 x half> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2f32 = fcmp olt <2 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v4f32 = fcmp olt <4 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v8f32 = fcmp olt <8 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v16f32 = fcmp olt <16 x float> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2f64 = fcmp olt <2 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v4f64 = fcmp olt <4 x double> undef, undef +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v8f64 = fcmp olt <8 x double> undef, undef ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-MVEFP-LABEL: 'fcmp' ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fcmp olt <2 x half> undef, undef ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fcmp olt <4 x half> undef, undef ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = fcmp olt <8 x half> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %v16f16 = fcmp olt <16 x half> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v16f16 = fcmp olt <16 x half> undef, undef ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fcmp olt <2 x float> undef, undef ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fcmp olt <4 x float> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v8f32 = fcmp olt <8 x float> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %v16f32 = fcmp olt <16 x float> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2f64 = fcmp olt <2 x double> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v4f64 = fcmp olt <4 x double> undef, undef -; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %v8f64 = fcmp olt <8 x double> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v8f32 = fcmp olt <8 x float> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v16f32 = fcmp olt <16 x float> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = fcmp olt <2 x double> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = fcmp olt <4 x double> undef, undef +; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8f64 = fcmp olt <8 x double> undef, undef ; CHECK-MVEFP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %v2f16 = fcmp olt <2 x half> undef, undef diff --git a/llvm/test/Analysis/CostModel/ARM/mve-gather-scatter-cost.ll b/llvm/test/Analysis/CostModel/ARM/mve-gather-scatter-cost.ll index fe75e5087ec4a..c368991faea65 100644 --- a/llvm/test/Analysis/CostModel/ARM/mve-gather-scatter-cost.ll +++ b/llvm/test/Analysis/CostModel/ARM/mve-gather-scatter-cost.ll @@ -3,32 +3,32 @@ define i32 @masked_gather() { ; CHECK-LABEL: 'masked_gather' -; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 4, <4 x i1> undef, <4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 4, <2 x i1> undef, <2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 4, <16 x i1> undef, <16 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 4, <8 x i1> undef, <8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 4, <4 x i1> undef, <4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 4, <2 x i1> undef, <2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 4, <16 x i1> undef, <16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 4, <8 x i1> undef, <8 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 4, <4 x i1> undef, <4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 4, <2 x i1> undef, <2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %V16F16 = call <16 x half> @llvm.masked.gather.v16f16.v16p0f16(<16 x half*> undef, i32 2, <16 x i1> undef, <16 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V8F16 = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> undef, i32 2, <8 x i1> undef, <8 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4F16 = call <4 x half> @llvm.masked.gather.v4f16.v4p0f16(<4 x half*> undef, i32 2, <4 x i1> undef, <4 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2F16 = call <2 x half> @llvm.masked.gather.v2f16.v2p0f16(<2 x half*> undef, i32 2, <2 x i1> undef, <2 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 4, <4 x i1> undef, <4 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 4, <2 x i1> undef, <2 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 4, <16 x i1> undef, <16 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 4, <8 x i1> undef, <8 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 4, <2 x i1> undef, <2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16F16 = call <16 x half> @llvm.masked.gather.v16f16.v16p0f16(<16 x half*> undef, i32 2, <16 x i1> undef, <16 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F16 = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> undef, i32 2, <8 x i1> undef, <8 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F16 = call <4 x half> @llvm.masked.gather.v4f16.v4p0f16(<4 x half*> undef, i32 2, <4 x i1> undef, <4 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F16 = call <2 x half> @llvm.masked.gather.v2f16.v2p0f16(<2 x half*> undef, i32 2, <2 x i1> undef, <2 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 4, <4 x i1> undef, <4 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 4, <2 x i1> undef, <2 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 4, <16 x i1> undef, <16 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 4, <8 x i1> undef, <8 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 4, <4 x i1> undef, <4 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 4, <2 x i1> undef, <2 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 2, <16 x i1> undef, <16 x i16> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 2, <8 x i1> undef, <8 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 4, <2 x i1> undef, <2 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 2, <16 x i1> undef, <16 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 2, <8 x i1> undef, <8 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 2, <4 x i1> undef, <4 x i16> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I16 = call <2 x i16> @llvm.masked.gather.v2i16.v2p0i16(<2 x i16*> undef, i32 2, <2 x i1> undef, <2 x i16> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2112 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I16 = call <2 x i16> @llvm.masked.gather.v2i16.v2p0i16(<2 x i16*> undef, i32 2, <2 x i1> undef, <2 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4I8 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> undef, i32 1, <4 x i1> undef, <4 x i8> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I8 = call <2 x i8> @llvm.masked.gather.v2i8.v2p0i8(<2 x i8*> undef, i32 1, <2 x i1> undef, <2 x i8> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I32p = call <4 x i32*> @llvm.masked.gather.v4p0i32.v4p0p0i32(<4 x i32**> undef, i32 4, <4 x i1> undef, <4 x i32*> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I8 = call <2 x i8> @llvm.masked.gather.v2i8.v2p0i8(<2 x i8*> undef, i32 1, <2 x i1> undef, <2 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I32p = call <4 x i32*> @llvm.masked.gather.v4p0i32.v4p0p0i32(<4 x i32**> undef, i32 4, <4 x i1> undef, <4 x i32*> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 4, <4 x i1> undef, <4 x double> undef) @@ -70,31 +70,31 @@ define i32 @masked_gather() { define i32 @masked_scatter() { ; CHECK-LABEL: 'masked_scatter' -; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 4, <4 x i1> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 4, <2 x i1> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 576 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 4, <16 x i1> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 4, <8 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 4, <4 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 4, <2 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 4, <16 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 4, <8 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 4, <4 x i1> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 4, <2 x i1> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 544 for instruction: call void @llvm.masked.scatter.v16f16.v16p0f16(<16 x half> undef, <16 x half*> undef, i32 2, <16 x i1> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> undef, <8 x half*> undef, i32 2, <8 x i1> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v4f16.v4p0f16(<4 x half> undef, <4 x half*> undef, i32 2, <4 x i1> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2f16.v2p0f16(<2 x half> undef, <2 x half*> undef, i32 2, <2 x i1> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 4, <4 x i1> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 4, <2 x i1> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 576 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 4, <16 x i1> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 4, <8 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 4, <2 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v16f16.v16p0f16(<16 x half> undef, <16 x half*> undef, i32 2, <16 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> undef, <8 x half*> undef, i32 2, <8 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f16.v4p0f16(<4 x half> undef, <4 x half*> undef, i32 2, <4 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2f16.v2p0f16(<2 x half> undef, <2 x half*> undef, i32 2, <2 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 4, <4 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 4, <2 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 4, <16 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 4, <8 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 4, <4 x i1> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 4, <2 x i1> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 544 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 2, <16 x i1> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 2, <8 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 4, <2 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 2, <16 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 2, <8 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 2, <4 x i1> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2i16.v2p0i16(<2 x i16> undef, <2 x i16*> undef, i32 2, <2 x i1> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2112 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 528 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v2i16.v2p0i16(<2 x i16> undef, <2 x i16*> undef, i32 2, <2 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> undef, <4 x i8*> undef, i32 1, <4 x i1> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2i8.v2p0i8(<2 x i8> undef, <2 x i8*> undef, i32 1, <2 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v2i8.v2p0i8(<2 x i8> undef, <2 x i8*> undef, i32 1, <2 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 4, <4 x i1> undef) @@ -205,8 +205,8 @@ define void @gep_v4f32(float* %base, i16* %base16, i8* %base8, <4 x i32> %ind32, ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res3 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep3, i32 4, <4 x i1> %mask, <4 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> %res3, <4 x float*> %gep3, i32 4, <4 x i1> %mask) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %gepu = getelementptr float, float* %base, <4 x i32> %ind32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %resu = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gepu, i32 1, <4 x i1> %mask, <4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> %resu, <4 x float*> %gepu, i32 1, <4 x i1> %mask) +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %resu = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gepu, i32 1, <4 x i1> %mask, <4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> %resu, <4 x float*> %gepu, i32 1, <4 x i1> %mask) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepos = getelementptr i8, i8* %base8, <4 x i32> %indzext ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %geposb = bitcast <4 x i8*> %gepos to <4 x float*> ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %resos = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %geposb, i32 4, <4 x i1> %mask, <4 x float> undef) @@ -336,26 +336,26 @@ define void @gep_v4i8(i8* %base, <4 x i8> %ind8, <4 x i1> %mask) { define void @gep_v8i16(i16* %base, i8* %base8, i32* %base32, <8 x i32> %ind32, <8 x i16> %ind16, <8 x i8> %ind8, <8 x i1> %mask) { ; CHECK-LABEL: 'gep_v8i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep1 = getelementptr i16, i16* %base, <8 x i32> %ind32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %res1 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %gep1, i32 2, <8 x i1> %mask, <8 x i16> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %res1, <8 x i16*> %gep1, i32 2, <8 x i1> %mask) +; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %res1 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %gep1, i32 2, <8 x i1> %mask, <8 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %res1, <8 x i16*> %gep1, i32 2, <8 x i1> %mask) ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %indzext = zext <8 x i16> %ind16 to <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep2 = getelementptr i16, i16* %base, <8 x i32> %indzext ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res2 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %gep2, i32 2, <8 x i1> %mask, <8 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %res2, <8 x i16*> %gep2, i32 2, <8 x i1> %mask) ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %indsext = sext <8 x i16> %ind16 to <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep3 = getelementptr i16, i16* %base, <8 x i32> %indsext -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %res3 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %gep3, i32 2, <8 x i1> %mask, <8 x i16> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %res3, <8 x i16*> %gep3, i32 2, <8 x i1> %mask) -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %resu = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %gep2, i32 1, <8 x i1> %mask, <8 x i16> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %resu, <8 x i16*> %gep2, i32 1, <8 x i1> %mask) +; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %res3 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %gep3, i32 2, <8 x i1> %mask, <8 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %res3, <8 x i16*> %gep3, i32 2, <8 x i1> %mask) +; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %resu = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %gep2, i32 1, <8 x i1> %mask, <8 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %resu, <8 x i16*> %gep2, i32 1, <8 x i1> %mask) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepos = getelementptr i8, i8* %base8, <8 x i32> %indzext ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %geposb = bitcast <8 x i8*> %gepos to <8 x i16*> ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %resos = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %geposb, i32 2, <8 x i1> %mask, <8 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %resos, <8 x i16*> %geposb, i32 2, <8 x i1> %mask) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbs = getelementptr i32, i32* %base32, <8 x i32> %indzext ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbsb = bitcast <8 x i32*> %gepbs to <8 x i16*> -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %resbs = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %gepbsb, i32 2, <8 x i1> %mask, <8 x i16> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %resbs, <8 x i16*> %gepbsb, i32 2, <8 x i1> %mask) +; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %resbs = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> %gepbsb, i32 2, <8 x i1> %mask, <8 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %resbs, <8 x i16*> %gepbsb, i32 2, <8 x i1> %mask) ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %indzext4 = zext <8 x i16> %ind16 to <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep4 = getelementptr i16, i16* %base, <8 x i32> %indzext4 ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %indtrunc = trunc <8 x i32> %ind32 to <8 x i16> @@ -417,26 +417,26 @@ define void @gep_v8i16(i16* %base, i8* %base8, i32* %base32, <8 x i32> %ind32, < define void @gep_v8f16(half* %base, i8* %base8, i32* %base32, <8 x i32> %ind32, <8 x i16> %ind16, <8 x i1> %mask) { ; CHECK-LABEL: 'gep_v8f16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %gep1 = getelementptr half, half* %base, <8 x i32> %ind32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %res1 = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> %gep1, i32 2, <8 x i1> %mask, <8 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %res1, <8 x half*> %gep1, i32 2, <8 x i1> %mask) +; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res1 = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> %gep1, i32 2, <8 x i1> %mask, <8 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %res1, <8 x half*> %gep1, i32 2, <8 x i1> %mask) ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %indzext = zext <8 x i16> %ind16 to <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %gep2 = getelementptr half, half* %base, <8 x i32> %indzext ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res2 = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> %gep2, i32 2, <8 x i1> %mask, <8 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %res2, <8 x half*> %gep2, i32 2, <8 x i1> %mask) ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %indsext = sext <8 x i16> %ind16 to <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %gep3 = getelementptr half, half* %base, <8 x i32> %indsext -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %res3 = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> %gep3, i32 2, <8 x i1> %mask, <8 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %res3, <8 x half*> %gep3, i32 2, <8 x i1> %mask) -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %resu = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> %gep2, i32 1, <8 x i1> %mask, <8 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %resu, <8 x half*> %gep2, i32 1, <8 x i1> %mask) +; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res3 = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> %gep3, i32 2, <8 x i1> %mask, <8 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %res3, <8 x half*> %gep3, i32 2, <8 x i1> %mask) +; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %resu = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> %gep2, i32 1, <8 x i1> %mask, <8 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %resu, <8 x half*> %gep2, i32 1, <8 x i1> %mask) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepos = getelementptr i8, i8* %base8, <8 x i32> %indzext ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %geposb = bitcast <8 x i8*> %gepos to <8 x half*> ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %resos = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> %geposb, i32 2, <8 x i1> %mask, <8 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %resos, <8 x half*> %geposb, i32 2, <8 x i1> %mask) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbs = getelementptr i32, i32* %base32, <8 x i32> %indzext ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbsb = bitcast <8 x i32*> %gepbs to <8 x half*> -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %resbs = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> %gepbsb, i32 2, <8 x i1> %mask, <8 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %resbs, <8 x half*> %gepbsb, i32 2, <8 x i1> %mask) +; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %resbs = call <8 x half> @llvm.masked.gather.v8f16.v8p0f16(<8 x half*> %gepbsb, i32 2, <8 x i1> %mask, <8 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %resbs, <8 x half*> %gepbsb, i32 2, <8 x i1> %mask) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; no offset ext @@ -509,20 +509,20 @@ define void @gep_v8i8(i8* %base, <8 x i8> %ind8, <8 x i1> %mask) { define void @gep_v16i8(i8* %base, i16* %base16, <16 x i8> %ind8, <16 x i32> %ind32, <16 x i1> %mask) { ; CHECK-LABEL: 'gep_v16i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep1 = getelementptr i8, i8* %base, <16 x i32> %ind32 -; CHECK-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %res1 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %gep1, i32 1, <16 x i1> %mask, <16 x i8> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 528 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %res1, <16 x i8*> %gep1, i32 2, <16 x i1> %mask) +; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %res1 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %gep1, i32 1, <16 x i1> %mask, <16 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %res1, <16 x i8*> %gep1, i32 2, <16 x i1> %mask) ; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %indzext = zext <16 x i8> %ind8 to <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep2 = getelementptr i8, i8* %base, <16 x i32> %indzext ; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %res2 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %gep2, i32 2, <16 x i1> %mask, <16 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %res2, <16 x i8*> %gep2, i32 2, <16 x i1> %mask) ; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %indsext = sext <16 x i8> %ind8 to <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep3 = getelementptr i8, i8* %base, <16 x i32> %indsext -; CHECK-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %res3 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %gep3, i32 2, <16 x i1> %mask, <16 x i8> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 528 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %res3, <16 x i8*> %gep3, i32 2, <16 x i1> %mask) +; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %res3 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %gep3, i32 2, <16 x i1> %mask, <16 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %res3, <16 x i8*> %gep3, i32 2, <16 x i1> %mask) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbs = getelementptr i16, i16* %base16, <16 x i32> %indzext ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbsb = bitcast <16 x i16*> %gepbs to <16 x i8*> -; CHECK-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %resbs = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %gepbsb, i32 2, <16 x i1> %mask, <16 x i8> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 528 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %resbs, <16 x i8*> %gepbsb, i32 2, <16 x i1> %mask) +; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %resbs = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %gepbsb, i32 2, <16 x i1> %mask, <16 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %resbs, <16 x i8*> %gepbsb, i32 2, <16 x i1> %mask) ; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %indzext4 = zext <16 x i8> %ind8 to <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep4 = getelementptr i8, i8* %base, <16 x i32> %indzext ; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %indtrunc = trunc <16 x i32> %ind32 to <16 x i8> @@ -564,8 +564,8 @@ define void @gep_v16i8(i8* %base, i16* %base16, <16 x i8> %ind8, <16 x i32> %ind define void @gep_v16i8p(<16 x i8*> %base, i32 %off, <16 x i1> %mask) { ; CHECK-LABEL: 'gep_v16i8p' ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gepbs = getelementptr i8, <16 x i8*> %base, i32 %off -; CHECK-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %resbs = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %gepbs, i32 2, <16 x i1> %mask, <16 x i8> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 528 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %resbs, <16 x i8*> %gepbs, i32 2, <16 x i1> %mask) +; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %resbs = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %gepbs, i32 2, <16 x i1> %mask, <16 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 144 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %resbs, <16 x i8*> %gepbs, i32 2, <16 x i1> %mask) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %gepbs = getelementptr i8, <16 x i8*> %base, i32 %off diff --git a/llvm/test/Analysis/CostModel/ARM/mve-minmax.ll b/llvm/test/Analysis/CostModel/ARM/mve-minmax.ll index ede3bdda30ca5..b075ba7549046 100644 --- a/llvm/test/Analysis/CostModel/ARM/mve-minmax.ll +++ b/llvm/test/Analysis/CostModel/ARM/mve-minmax.ll @@ -33,22 +33,22 @@ declare <64 x i8> @llvm.smin.v64i8(<64 x i8>, <64 x i8>) define i32 @smin(i32 %arg) { ; MVE-RECIP-LABEL: 'smin' ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 304 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I16 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I16 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.smin.v4i16(<4 x i16> undef, <4 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smin.i8(i8 undef, i8 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I8 = call <2 x i8> @llvm.smin.v2i8(<2 x i8> undef, <2 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I8 = call <2 x i8> @llvm.smin.v2i8(<2 x i8> undef, <2 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = call <4 x i8> @llvm.smin.v4i8(<4 x i8> undef, <4 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.smin.v8i8(<8 x i8> undef, <8 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef) @@ -140,22 +140,22 @@ declare <64 x i8> @llvm.smax.v64i8(<64 x i8>, <64 x i8>) define i32 @smax(i32 %arg) { ; MVE-RECIP-LABEL: 'smax' ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 304 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I16 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I16 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.smax.v4i16(<4 x i16> undef, <4 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smax.i8(i8 undef, i8 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I8 = call <2 x i8> @llvm.smax.v2i8(<2 x i8> undef, <2 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I8 = call <2 x i8> @llvm.smax.v2i8(<2 x i8> undef, <2 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> undef, <4 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.smax.v8i8(<8 x i8> undef, <8 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef) @@ -248,22 +248,22 @@ declare <64 x i8> @llvm.umin.v64i8(<64 x i8>, <64 x i8>) define i32 @umin(i32 %arg) { ; MVE-RECIP-LABEL: 'umin' ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 304 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I32 = call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I32 = call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I16 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I16 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.umin.v4i16(<4 x i16> undef, <4 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I8 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> undef, <2 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I8 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> undef, <2 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> undef, <4 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.umin.v8i8(<8 x i8> undef, <8 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef) @@ -355,22 +355,22 @@ declare <64 x i8> @llvm.umax.v64i8(<64 x i8>, <64 x i8>) define i32 @sub(i32 %arg) { ; MVE-RECIP-LABEL: 'sub' ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 304 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I32 = call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I32 = call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef) -; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I8 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> undef) +; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I8 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef) ; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef) @@ -455,45 +455,45 @@ declare <32 x half> @llvm.minnum.v32f16(<32 x half>, <32 x half>) define float @minnum(float %arg) { ; MVEI-RECIP-LABEL: 'minnum' ; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.minnum.f64(double undef, double undef) -; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2F64 = call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef) -; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F64 = call <4 x double> @llvm.minnum.v4f64(<4 x double> undef, <4 x double> undef) -; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V8F64 = call <8 x double> @llvm.minnum.v8f64(<8 x double> undef, <8 x double> undef) +; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2F64 = call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef) +; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4F64 = call <4 x double> @llvm.minnum.v4f64(<4 x double> undef, <4 x double> undef) +; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F64 = call <8 x double> @llvm.minnum.v8f64(<8 x double> undef, <8 x double> undef) ; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.minnum.f32(float undef, float undef) -; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2F32 = call <2 x float> @llvm.minnum.v2f32(<2 x float> undef, <2 x float> undef) -; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = call <4 x float> @llvm.minnum.v4f32(<4 x float> undef, <4 x float> undef) -; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V8F32 = call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef) -; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %V16F32 = call <16 x float> @llvm.minnum.v16f32(<16 x float> undef, <16 x float> undef) +; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2F32 = call <2 x float> @llvm.minnum.v2f32(<2 x float> undef, <2 x float> undef) +; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4F32 = call <4 x float> @llvm.minnum.v4f32(<4 x float> undef, <4 x float> undef) +; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F32 = call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef) +; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16F32 = call <16 x float> @llvm.minnum.v16f32(<16 x float> undef, <16 x float> undef) ; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F16 = call half @llvm.minnum.f16(half undef, half undef) -; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2F16 = call <2 x half> @llvm.minnum.v2f16(<2 x half> undef, <2 x half> undef) -; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F16 = call <4 x half> @llvm.minnum.v4f16(<4 x half> undef, <4 x half> undef) -; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V8F16 = call <8 x half> @llvm.minnum.v8f16(<8 x half> undef, <8 x half> undef) -; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %V16F16 = call <16 x half> @llvm.minnum.v16f16(<16 x half> undef, <16 x half> undef) -; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 1344 for instruction: %V32F16 = call <32 x half> @llvm.minnum.v32f16(<32 x half> undef, <32 x half> undef) +; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2F16 = call <2 x half> @llvm.minnum.v2f16(<2 x half> undef, <2 x half> undef) +; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4F16 = call <4 x half> @llvm.minnum.v4f16(<4 x half> undef, <4 x half> undef) +; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F16 = call <8 x half> @llvm.minnum.v8f16(<8 x half> undef, <8 x half> undef) +; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16F16 = call <16 x half> @llvm.minnum.v16f16(<16 x half> undef, <16 x half> undef) +; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 352 for instruction: %V32F16 = call <32 x half> @llvm.minnum.v32f16(<32 x half> undef, <32 x half> undef) ; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef ; ; MVEI-SIZE-LABEL: 'minnum' ; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.minnum.f64(double undef, double undef) -; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef) -; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4F64 = call <4 x double> @llvm.minnum.v4f64(<4 x double> undef, <4 x double> undef) -; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8F64 = call <8 x double> @llvm.minnum.v8f64(<8 x double> undef, <8 x double> undef) +; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef) +; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.minnum.v4f64(<4 x double> undef, <4 x double> undef) +; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.minnum.v8f64(<8 x double> undef, <8 x double> undef) ; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.minnum.f32(float undef, float undef) -; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F32 = call <2 x float> @llvm.minnum.v2f32(<2 x float> undef, <2 x float> undef) -; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4F32 = call <4 x float> @llvm.minnum.v4f32(<4 x float> undef, <4 x float> undef) -; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8F32 = call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef) -; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %V16F32 = call <16 x float> @llvm.minnum.v16f32(<16 x float> undef, <16 x float> undef) +; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.minnum.v2f32(<2 x float> undef, <2 x float> undef) +; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.minnum.v4f32(<4 x float> undef, <4 x float> undef) +; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef) +; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.minnum.v16f32(<16 x float> undef, <16 x float> undef) ; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.minnum.f16(half undef, half undef) -; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F16 = call <2 x half> @llvm.minnum.v2f16(<2 x half> undef, <2 x half> undef) -; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4F16 = call <4 x half> @llvm.minnum.v4f16(<4 x half> undef, <4 x half> undef) -; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8F16 = call <8 x half> @llvm.minnum.v8f16(<8 x half> undef, <8 x half> undef) -; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %V16F16 = call <16 x half> @llvm.minnum.v16f16(<16 x half> undef, <16 x half> undef) -; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %V32F16 = call <32 x half> @llvm.minnum.v32f16(<32 x half> undef, <32 x half> undef) +; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <2 x half> @llvm.minnum.v2f16(<2 x half> undef, <2 x half> undef) +; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call <4 x half> @llvm.minnum.v4f16(<4 x half> undef, <4 x half> undef) +; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call <8 x half> @llvm.minnum.v8f16(<8 x half> undef, <8 x half> undef) +; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = call <16 x half> @llvm.minnum.v16f16(<16 x half> undef, <16 x half> undef) +; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = call <32 x half> @llvm.minnum.v32f16(<32 x half> undef, <32 x half> undef) ; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float undef ; ; MVEF-RECIP-LABEL: 'minnum' ; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.minnum.f64(double undef, double undef) -; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2F64 = call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef) -; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F64 = call <4 x double> @llvm.minnum.v4f64(<4 x double> undef, <4 x double> undef) -; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V8F64 = call <8 x double> @llvm.minnum.v8f64(<8 x double> undef, <8 x double> undef) +; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2F64 = call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef) +; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4F64 = call <4 x double> @llvm.minnum.v4f64(<4 x double> undef, <4 x double> undef) +; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F64 = call <8 x double> @llvm.minnum.v8f64(<8 x double> undef, <8 x double> undef) ; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.minnum.f32(float undef, float undef) ; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.minnum.v2f32(<2 x float> undef, <2 x float> undef) ; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.minnum.v4f32(<4 x float> undef, <4 x float> undef) @@ -509,9 +509,9 @@ define float @minnum(float %arg) { ; ; MVEF-SIZE-LABEL: 'minnum' ; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.minnum.f64(double undef, double undef) -; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef) -; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4F64 = call <4 x double> @llvm.minnum.v4f64(<4 x double> undef, <4 x double> undef) -; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8F64 = call <8 x double> @llvm.minnum.v8f64(<8 x double> undef, <8 x double> undef) +; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef) +; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.minnum.v4f64(<4 x double> undef, <4 x double> undef) +; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.minnum.v8f64(<8 x double> undef, <8 x double> undef) ; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.minnum.f32(float undef, float undef) ; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = call <2 x float> @llvm.minnum.v2f32(<2 x float> undef, <2 x float> undef) ; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.minnum.v4f32(<4 x float> undef, <4 x float> undef) @@ -567,45 +567,45 @@ declare <32 x half> @llvm.maxnum.v32f16(<32 x half>, <32 x half>) define float @maxnum(float %arg) { ; MVEI-RECIP-LABEL: 'maxnum' ; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.maxnum.f64(double undef, double undef) -; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2F64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef) -; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef) -; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V8F64 = call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef) +; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2F64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef) +; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4F64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef) +; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F64 = call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef) ; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.maxnum.f32(float undef, float undef) -; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2F32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef) -; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef) -; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef) -; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef) +; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2F32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef) +; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4F32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef) +; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef) +; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef) ; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F16 = call half @llvm.maxnum.f16(half undef, half undef) -; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2F16 = call <2 x half> @llvm.maxnum.v2f16(<2 x half> undef, <2 x half> undef) -; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F16 = call <4 x half> @llvm.maxnum.v4f16(<4 x half> undef, <4 x half> undef) -; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V8F16 = call <8 x half> @llvm.maxnum.v8f16(<8 x half> undef, <8 x half> undef) -; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %V16F16 = call <16 x half> @llvm.maxnum.v16f16(<16 x half> undef, <16 x half> undef) -; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 1344 for instruction: %V32F16 = call <32 x half> @llvm.maxnum.v32f16(<32 x half> undef, <32 x half> undef) +; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2F16 = call <2 x half> @llvm.maxnum.v2f16(<2 x half> undef, <2 x half> undef) +; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4F16 = call <4 x half> @llvm.maxnum.v4f16(<4 x half> undef, <4 x half> undef) +; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F16 = call <8 x half> @llvm.maxnum.v8f16(<8 x half> undef, <8 x half> undef) +; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16F16 = call <16 x half> @llvm.maxnum.v16f16(<16 x half> undef, <16 x half> undef) +; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 352 for instruction: %V32F16 = call <32 x half> @llvm.maxnum.v32f16(<32 x half> undef, <32 x half> undef) ; MVEI-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float undef ; ; MVEI-SIZE-LABEL: 'maxnum' ; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.maxnum.f64(double undef, double undef) -; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef) -; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4F64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef) -; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8F64 = call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef) +; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef) +; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef) +; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef) ; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.maxnum.f32(float undef, float undef) -; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef) -; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4F32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef) -; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef) -; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef) +; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef) +; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef) +; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef) +; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef) ; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.maxnum.f16(half undef, half undef) -; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F16 = call <2 x half> @llvm.maxnum.v2f16(<2 x half> undef, <2 x half> undef) -; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4F16 = call <4 x half> @llvm.maxnum.v4f16(<4 x half> undef, <4 x half> undef) -; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8F16 = call <8 x half> @llvm.maxnum.v8f16(<8 x half> undef, <8 x half> undef) -; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %V16F16 = call <16 x half> @llvm.maxnum.v16f16(<16 x half> undef, <16 x half> undef) -; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %V32F16 = call <32 x half> @llvm.maxnum.v32f16(<32 x half> undef, <32 x half> undef) +; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <2 x half> @llvm.maxnum.v2f16(<2 x half> undef, <2 x half> undef) +; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call <4 x half> @llvm.maxnum.v4f16(<4 x half> undef, <4 x half> undef) +; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call <8 x half> @llvm.maxnum.v8f16(<8 x half> undef, <8 x half> undef) +; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F16 = call <16 x half> @llvm.maxnum.v16f16(<16 x half> undef, <16 x half> undef) +; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32F16 = call <32 x half> @llvm.maxnum.v32f16(<32 x half> undef, <32 x half> undef) ; MVEI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret float undef ; ; MVEF-RECIP-LABEL: 'maxnum' ; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.maxnum.f64(double undef, double undef) -; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2F64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef) -; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef) -; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V8F64 = call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef) +; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2F64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef) +; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4F64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef) +; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F64 = call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef) ; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.maxnum.f32(float undef, float undef) ; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef) ; MVEF-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef) @@ -621,9 +621,9 @@ define float @maxnum(float %arg) { ; ; MVEF-SIZE-LABEL: 'maxnum' ; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.maxnum.f64(double undef, double undef) -; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef) -; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4F64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef) -; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V8F64 = call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef) +; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef) +; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef) +; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef) ; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.maxnum.f32(float undef, float undef) ; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef) ; MVEF-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/mve-vecreduce-add.ll b/llvm/test/Analysis/CostModel/ARM/mve-vecreduce-add.ll index 8b59de716fcc7..c670d6f6baeea 100644 --- a/llvm/test/Analysis/CostModel/ARM/mve-vecreduce-add.ll +++ b/llvm/test/Analysis/CostModel/ARM/mve-vecreduce-add.ll @@ -3,8 +3,8 @@ define void @add_i8() { ; CHECK-LABEL: 'add_i8' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a4 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) @@ -26,13 +26,13 @@ define void @add_i8() { define void @add_i16() { ; CHECK-LABEL: 'add_i16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0za = zext <1 x i8> undef to <1 x i16> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a0z = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0za) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0z = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0za) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sa = sext <1 x i8> undef to <1 x i16> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a0s = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0sa) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0s = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0sa) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a1za = zext <2 x i8> undef to <2 x i16> -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a1z = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1za) +; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1z = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1za) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1sa = sext <2 x i8> undef to <2 x i16> -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a1s = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1sa) +; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1s = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1sa) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2za = zext <4 x i8> undef to <4 x i16> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2z = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2za) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2sa = sext <4 x i8> undef to <4 x i16> @@ -45,8 +45,8 @@ define void @add_i16() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a4z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4za) ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a4sa = sext <16 x i8> undef to <16 x i16> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a4s = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4sa) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a5 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a6 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a5 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a6 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a9 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) @@ -98,13 +98,13 @@ define void @add_i16() { define void @add_i32() { ; CHECK-LABEL: 'add_i32' ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0za = zext <1 x i8> undef to <1 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a0z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0za) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0za) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sa = sext <1 x i8> undef to <1 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a0s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0sa) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0sa) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a1za = zext <2 x i8> undef to <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a1z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1za) +; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1za) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1sa = sext <2 x i8> undef to <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a1s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1sa) +; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1sa) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2za = zext <4 x i8> undef to <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2za) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2sa = sext <4 x i8> undef to <4 x i32> @@ -118,13 +118,13 @@ define void @add_i32() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a4sa = sext <16 x i8> undef to <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a4s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4sa) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5za = zext <1 x i16> undef to <1 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a5z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5za) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a5z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5za) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5sa = sext <1 x i16> undef to <1 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a5s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5sa) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a5s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5sa) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a6za = zext <2 x i16> undef to <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a6z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6za) +; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a6z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6za) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a6sa = sext <2 x i16> undef to <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a6s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6sa) +; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a6s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6sa) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7za = zext <4 x i16> undef to <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7za) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7sa = sext <4 x i16> undef to <4 x i32> @@ -137,8 +137,8 @@ define void @add_i32() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a9z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9za) ; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a9sa = sext <16 x i16> undef to <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a9s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9sa) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a10 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a11 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a10 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a11 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a12 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a13 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a14 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef) @@ -219,71 +219,71 @@ define void @add_i32() { define void @add_i64() { ; CHECK-LABEL: 'add_i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a0za = zext <1 x i8> undef to <1 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a0z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0za) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a0sa = sext <1 x i8> undef to <1 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a0s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0sa) +; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a0za = zext <1 x i8> undef to <1 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a0z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0za) +; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a0sa = sext <1 x i8> undef to <1 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a0s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0sa) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1za = zext <2 x i8> undef to <2 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %a1z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1za) +; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a1z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1za) ; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %a1sa = sext <2 x i8> undef to <2 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %a1s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1sa) +; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a1s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1sa) ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a2za = zext <4 x i8> undef to <4 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a2z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2za) +; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a2z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2za) ; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %a2sa = sext <4 x i8> undef to <4 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a2s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2sa) +; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a2s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2sa) ; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a3za = zext <8 x i8> undef to <8 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %a3z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3za) +; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a3z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3za) ; CHECK-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %a3sa = sext <8 x i8> undef to <8 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %a3s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3sa) +; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a3s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3sa) ; CHECK-NEXT: Cost Model: Found an estimated cost of 298 for instruction: %a4za = zext <16 x i8> undef to <16 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 730 for instruction: %a4z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4za) +; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a4z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4za) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %a4sa = sext <16 x i8> undef to <16 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 730 for instruction: %a4s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4sa) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a5za = zext <1 x i16> undef to <1 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a5z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5za) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a5sa = sext <1 x i16> undef to <1 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a5s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5sa) +; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a4s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4sa) +; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a5za = zext <1 x i16> undef to <1 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a5z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5za) +; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %a5sa = sext <1 x i16> undef to <1 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a5s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5sa) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a6za = zext <2 x i16> undef to <2 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %a6z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6za) +; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a6z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6za) ; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %a6sa = sext <2 x i16> undef to <2 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %a6s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6sa) +; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a6s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6sa) ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a7za = zext <4 x i16> undef to <4 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a7z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7za) +; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a7z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7za) ; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %a7sa = sext <4 x i16> undef to <4 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a7s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7sa) +; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a7s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7sa) ; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a8za = zext <8 x i16> undef to <8 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %a8z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8za) +; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a8z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8za) ; CHECK-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %a8sa = sext <8 x i16> undef to <8 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %a8s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8sa) +; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a8s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8sa) ; CHECK-NEXT: Cost Model: Found an estimated cost of 296 for instruction: %a9za = zext <16 x i16> undef to <16 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 730 for instruction: %a9z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9za) +; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a9z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9za) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %a9sa = sext <16 x i16> undef to <16 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 730 for instruction: %a9s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9sa) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a10za = zext <1 x i32> undef to <1 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a10z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10za) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a10sa = sext <1 x i32> undef to <1 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a10s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10sa) +; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a9s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9sa) +; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a10za = zext <1 x i32> undef to <1 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a10z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10za) +; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a10sa = sext <1 x i32> undef to <1 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a10s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10sa) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a11za = zext <2 x i32> undef to <2 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %a11z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11za) +; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a11z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11za) ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a11sa = sext <2 x i32> undef to <2 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %a11s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11sa) +; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a11s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11sa) ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a12za = zext <4 x i32> undef to <4 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a12z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12za) +; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a12z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12za) ; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a12sa = sext <4 x i32> undef to <4 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a12s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12sa) +; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a12s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12sa) ; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %a13za = zext <8 x i32> undef to <8 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %a13z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13za) +; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a13z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13za) ; CHECK-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %a13sa = sext <8 x i32> undef to <8 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %a13s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13sa) +; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a13s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13sa) ; CHECK-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %a14za = zext <16 x i32> undef to <16 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 730 for instruction: %a14z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14za) +; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a14z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14za) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %a14sa = sext <16 x i32> undef to <16 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 730 for instruction: %a14s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14sa) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a15 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %a16 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a17 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %a18 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 730 for instruction: %a19 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a14s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14sa) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a15 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a16 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a17 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a18 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a19 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %a0za = zext <1 x i8> undef to <1 x i64> @@ -392,9 +392,9 @@ define void @add_i64() { define void @mla_i8() { ; CHECK-LABEL: 'mla_i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0m = mul <1 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> %a0m) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a1m = mul <2 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> %a1m) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> %a0m) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a1m = mul <2 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> %a1m) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2m = mul <4 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %a2m) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3m = mul <8 x i8> undef, undef @@ -426,19 +426,19 @@ define void @mla_i16() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0za = zext <1 x i8> undef to <1 x i16> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0zb = zext <1 x i8> undef to <1 x i16> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0zm = mul <1 x i16> %a0za, %a0zb -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a0z = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0zm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0z = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0zm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sa = sext <1 x i8> undef to <1 x i16> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sb = sext <1 x i8> undef to <1 x i16> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sm = mul <1 x i16> %a0sa, %a0sb -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a0s = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0sm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0s = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a0sm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a1za = zext <2 x i8> undef to <2 x i16> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a1zb = zext <2 x i8> undef to <2 x i16> -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %a1zm = mul <2 x i16> %a1za, %a1zb -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a1z = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1zm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %a1zm = mul <2 x i16> %a1za, %a1zb +; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1z = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1zm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1sa = sext <2 x i8> undef to <2 x i16> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1sb = sext <2 x i8> undef to <2 x i16> -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %a1sm = mul <2 x i16> %a1sa, %a1sb -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a1s = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1sm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %a1sm = mul <2 x i16> %a1sa, %a1sb +; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1s = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1sm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2za = zext <4 x i8> undef to <4 x i16> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2zb = zext <4 x i8> undef to <4 x i16> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2zm = mul <4 x i16> %a2za, %a2zb @@ -464,9 +464,9 @@ define void @mla_i16() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a4sm = mul <16 x i16> %a4sa, %a4sb ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a4s = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4sm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5m = mul <1 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a5 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a5m) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a6m = mul <2 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a6 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a6m) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a5 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a5m) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a6m = mul <2 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a6 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a6m) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7m = mul <4 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a7m) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a8m = mul <8 x i16> undef, undef @@ -548,19 +548,19 @@ define void @mla_i32() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0za = zext <1 x i8> undef to <1 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0zb = zext <1 x i8> undef to <1 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0zm = mul <1 x i32> %a0za, %a0zb -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a0z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0zm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0zm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sa = sext <1 x i8> undef to <1 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sb = sext <1 x i8> undef to <1 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a0sm = mul <1 x i32> %a0sa, %a0sb -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a0s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0sm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a0sm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a1za = zext <2 x i8> undef to <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a1zb = zext <2 x i8> undef to <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %a1zm = mul <2 x i32> %a1za, %a1zb -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a1z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1zm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %a1zm = mul <2 x i32> %a1za, %a1zb +; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1zm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1sa = sext <2 x i8> undef to <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1sb = sext <2 x i8> undef to <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %a1sm = mul <2 x i32> %a1sa, %a1sb -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a1s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1sm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %a1sm = mul <2 x i32> %a1sa, %a1sb +; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a1s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1sm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2za = zext <4 x i8> undef to <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2zb = zext <4 x i8> undef to <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2zm = mul <4 x i32> %a2za, %a2zb @@ -588,19 +588,19 @@ define void @mla_i32() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5za = zext <1 x i16> undef to <1 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5zb = zext <1 x i16> undef to <1 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5zm = mul <1 x i32> %a5za, %a5zb -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a5z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5zm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a5z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5zm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5sa = sext <1 x i16> undef to <1 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5sb = sext <1 x i16> undef to <1 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5sm = mul <1 x i32> %a5sa, %a5sb -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a5s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5sm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a5s = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5sm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a6za = zext <2 x i16> undef to <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a6zb = zext <2 x i16> undef to <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %a6zm = mul <2 x i32> %a6za, %a6zb -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a6z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6zm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %a6zm = mul <2 x i32> %a6za, %a6zb +; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a6z = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6zm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a6sa = sext <2 x i16> undef to <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a6sb = sext <2 x i16> undef to <2 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %a6sm = mul <2 x i32> %a6sa, %a6sb -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a6s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6sm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %a6sm = mul <2 x i32> %a6sa, %a6sb +; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a6s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6sm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7za = zext <4 x i16> undef to <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7zb = zext <4 x i16> undef to <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7zm = mul <4 x i32> %a7za, %a7zb @@ -626,9 +626,9 @@ define void @mla_i32() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a9sm = mul <16 x i32> %a9sa, %a9sb ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a9s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9sm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a10m = mul <1 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a10 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a10m) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a11m = mul <2 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a11 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a11m) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a10 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a10m) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a11m = mul <2 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %a11 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a11m) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a12m = mul <4 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a12 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a12m) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a13m = mul <8 x i32> undef, undef @@ -757,136 +757,136 @@ define void @mla_i32() { define void @mla_i64() { ; CHECK-LABEL: 'mla_i64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a0za = zext <1 x i8> undef to <1 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a0zb = zext <1 x i8> undef to <1 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a0za = zext <1 x i8> undef to <1 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a0zb = zext <1 x i8> undef to <1 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0zm = mul <1 x i64> %a0za, %a0zb -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a0z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0zm) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a0sa = sext <1 x i8> undef to <1 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a0sb = sext <1 x i8> undef to <1 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a0z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0zm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a0sa = sext <1 x i8> undef to <1 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a0sb = sext <1 x i8> undef to <1 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a0sm = mul <1 x i64> %a0sa, %a0sb -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a0s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0sm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a0s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a0sm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1za = zext <2 x i8> undef to <2 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1zb = zext <2 x i8> undef to <2 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a1zm = mul <2 x i64> %a1za, %a1zb -; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %a1z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1zm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %a1zm = mul <2 x i64> %a1za, %a1zb +; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a1z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1zm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %a1sa = sext <2 x i8> undef to <2 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %a1sb = sext <2 x i8> undef to <2 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a1sm = mul <2 x i64> %a1sa, %a1sb -; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %a1s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1sm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %a1sm = mul <2 x i64> %a1sa, %a1sb +; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a1s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1sm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a2za = zext <4 x i8> undef to <4 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a2zb = zext <4 x i8> undef to <4 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %a2zm = mul <4 x i64> %a2za, %a2zb -; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a2z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2zm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %a2zm = mul <4 x i64> %a2za, %a2zb +; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a2z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2zm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %a2sa = sext <4 x i8> undef to <4 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %a2sb = sext <4 x i8> undef to <4 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %a2sm = mul <4 x i64> %a2sa, %a2sb -; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a2s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2sm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %a2sm = mul <4 x i64> %a2sa, %a2sb +; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a2s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a2sm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a3za = zext <8 x i8> undef to <8 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a3zb = zext <8 x i8> undef to <8 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a3zm = mul <8 x i64> %a3za, %a3zb -; CHECK-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %a3z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3zm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a3z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3zm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %a3sa = sext <8 x i8> undef to <8 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %a3sb = sext <8 x i8> undef to <8 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a3sm = mul <8 x i64> %a3sa, %a3sb -; CHECK-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %a3s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3sm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a3s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a3sm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 298 for instruction: %a4za = zext <16 x i8> undef to <16 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 298 for instruction: %a4zb = zext <16 x i8> undef to <16 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 800 for instruction: %a4zm = mul <16 x i64> %a4za, %a4zb -; CHECK-NEXT: Cost Model: Found an estimated cost of 730 for instruction: %a4z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4zm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %a4zm = mul <16 x i64> %a4za, %a4zb +; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a4z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4zm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %a4sa = sext <16 x i8> undef to <16 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1322 for instruction: %a4sb = sext <16 x i8> undef to <16 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 800 for instruction: %a4sm = mul <16 x i64> %a4sa, %a4sb -; CHECK-NEXT: Cost Model: Found an estimated cost of 730 for instruction: %a4s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4sm) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a5za = zext <1 x i16> undef to <1 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a5zb = zext <1 x i16> undef to <1 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %a4sm = mul <16 x i64> %a4sa, %a4sb +; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a4s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a4sm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a5za = zext <1 x i16> undef to <1 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a5zb = zext <1 x i16> undef to <1 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a5zm = mul <1 x i64> %a5za, %a5zb -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a5z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5zm) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a5sa = sext <1 x i16> undef to <1 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a5sb = sext <1 x i16> undef to <1 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a5z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5zm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %a5sa = sext <1 x i16> undef to <1 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %a5sb = sext <1 x i16> undef to <1 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a5sm = mul <1 x i64> %a5sa, %a5sb -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a5s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5sm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a5s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a5sm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a6za = zext <2 x i16> undef to <2 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a6zb = zext <2 x i16> undef to <2 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a6zm = mul <2 x i64> %a6za, %a6zb -; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %a6z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6zm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %a6zm = mul <2 x i64> %a6za, %a6zb +; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a6z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6zm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %a6sa = sext <2 x i16> undef to <2 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %a6sb = sext <2 x i16> undef to <2 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a6sm = mul <2 x i64> %a6sa, %a6sb -; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %a6s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6sm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %a6sm = mul <2 x i64> %a6sa, %a6sb +; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a6s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a6sm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a7za = zext <4 x i16> undef to <4 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a7zb = zext <4 x i16> undef to <4 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %a7zm = mul <4 x i64> %a7za, %a7zb -; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a7z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7zm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %a7zm = mul <4 x i64> %a7za, %a7zb +; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a7z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7zm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %a7sa = sext <4 x i16> undef to <4 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %a7sb = sext <4 x i16> undef to <4 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %a7sm = mul <4 x i64> %a7sa, %a7sb -; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a7s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7sm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %a7sm = mul <4 x i64> %a7sa, %a7sb +; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a7s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a7sm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a8za = zext <8 x i16> undef to <8 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a8zb = zext <8 x i16> undef to <8 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a8zm = mul <8 x i64> %a8za, %a8zb -; CHECK-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %a8z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8zm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a8z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8zm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %a8sa = sext <8 x i16> undef to <8 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %a8sb = sext <8 x i16> undef to <8 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a8sm = mul <8 x i64> %a8sa, %a8sb -; CHECK-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %a8s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8sm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a8s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a8sm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 296 for instruction: %a9za = zext <16 x i16> undef to <16 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 296 for instruction: %a9zb = zext <16 x i16> undef to <16 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 800 for instruction: %a9zm = mul <16 x i64> %a9za, %a9zb -; CHECK-NEXT: Cost Model: Found an estimated cost of 730 for instruction: %a9z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9zm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %a9zm = mul <16 x i64> %a9za, %a9zb +; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a9z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9zm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %a9sa = sext <16 x i16> undef to <16 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %a9sb = sext <16 x i16> undef to <16 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 800 for instruction: %a9sm = mul <16 x i64> %a9sa, %a9sb -; CHECK-NEXT: Cost Model: Found an estimated cost of 730 for instruction: %a9s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9sm) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a10za = zext <1 x i32> undef to <1 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a10zb = zext <1 x i32> undef to <1 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %a9sm = mul <16 x i64> %a9sa, %a9sb +; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a9s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a9sm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a10za = zext <1 x i32> undef to <1 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a10zb = zext <1 x i32> undef to <1 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a10zm = mul <1 x i64> %a10za, %a10zb -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a10z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10zm) -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a10sa = sext <1 x i32> undef to <1 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a10sb = sext <1 x i32> undef to <1 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a10z = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10zm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a10sa = sext <1 x i32> undef to <1 x i64> +; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %a10sb = sext <1 x i32> undef to <1 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a10sm = mul <1 x i64> %a10sa, %a10sb -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a10s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10sm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a10s = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a10sm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a11za = zext <2 x i32> undef to <2 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a11zb = zext <2 x i32> undef to <2 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a11zm = mul <2 x i64> %a11za, %a11zb -; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %a11z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11zm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %a11zm = mul <2 x i64> %a11za, %a11zb +; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a11z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11zm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a11sa = sext <2 x i32> undef to <2 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a11sb = sext <2 x i32> undef to <2 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %a11sm = mul <2 x i64> %a11sa, %a11sb -; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %a11s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11sm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %a11sm = mul <2 x i64> %a11sa, %a11sb +; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a11s = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a11sm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a12za = zext <4 x i32> undef to <4 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a12zb = zext <4 x i32> undef to <4 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %a12zm = mul <4 x i64> %a12za, %a12zb -; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a12z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12zm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %a12zm = mul <4 x i64> %a12za, %a12zb +; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a12z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12zm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a12sa = sext <4 x i32> undef to <4 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a12sb = sext <4 x i32> undef to <4 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %a12sm = mul <4 x i64> %a12sa, %a12sb -; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a12s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12sm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %a12sm = mul <4 x i64> %a12sa, %a12sb +; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a12s = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a12sm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %a13za = zext <8 x i32> undef to <8 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %a13zb = zext <8 x i32> undef to <8 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a13zm = mul <8 x i64> %a13za, %a13zb -; CHECK-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %a13z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13zm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a13z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13zm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %a13sa = sext <8 x i32> undef to <8 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %a13sb = sext <8 x i32> undef to <8 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a13sm = mul <8 x i64> %a13sa, %a13sb -; CHECK-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %a13s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13sm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a13s = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a13sm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %a14za = zext <16 x i32> undef to <16 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %a14zb = zext <16 x i32> undef to <16 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 800 for instruction: %a14zm = mul <16 x i64> %a14za, %a14zb -; CHECK-NEXT: Cost Model: Found an estimated cost of 730 for instruction: %a14z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14zm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %a14zm = mul <16 x i64> %a14za, %a14zb +; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a14z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14zm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %a14sa = sext <16 x i32> undef to <16 x i64> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %a14sb = sext <16 x i32> undef to <16 x i64> -; CHECK-NEXT: Cost Model: Found an estimated cost of 800 for instruction: %a14sm = mul <16 x i64> %a14sa, %a14sb -; CHECK-NEXT: Cost Model: Found an estimated cost of 730 for instruction: %a14s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14sm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %a14sm = mul <16 x i64> %a14sa, %a14sb +; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a14s = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a14sm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a15m = mul <1 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a15 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a15m) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a16m = mul <2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %a16 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a16m) -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %a17m = mul <4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %a17 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a17m) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a15 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a15m) +; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %a16m = mul <2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %a16 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a16m) +; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a17m = mul <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %a17 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a17m) ; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %a18m = mul <8 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %a18 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a18m) -; CHECK-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %a19m = mul <16 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 730 for instruction: %a19 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a19m) +; CHECK-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %a18 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a18m) +; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %a19m = mul <16 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 808 for instruction: %a19 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a19m) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %a0za = zext <1 x i8> undef to <1 x i64> diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-smax.ll b/llvm/test/Analysis/CostModel/ARM/reduce-smax.ll index 868ebdc24756f..5a72f3be05cc1 100644 --- a/llvm/test/Analysis/CostModel/ARM/reduce-smax.ll +++ b/llvm/test/Analysis/CostModel/ARM/reduce-smax.ll @@ -21,11 +21,11 @@ define i32 @reduce_i64(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i64' -; MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 282 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 970 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V2 = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V4 = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 568 for instruction: %V8 = call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 1128 for instruction: %V16 = call i64 @llvm.vector.reduce.smax.v16i64(<16 x i64> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> undef) @@ -54,11 +54,11 @@ define i32 @reduce_i32(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i32' -; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 696 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 2504 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %V8 = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %V16 = call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 712 for instruction: %V32 = call i32 @llvm.vector.reduce.smax.v32i32(<32 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> undef) @@ -89,12 +89,12 @@ define i32 @reduce_i16(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i16' -; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 2976 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 10160 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 400 for instruction: %V8 = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 532 for instruction: %V16 = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 860 for instruction: %V32 = call i16 @llvm.vector.reduce.smax.v32i16(<32 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 1516 for instruction: %V64 = call i16 @llvm.vector.reduce.smax.v64i16(<64 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i16 @llvm.vector.reduce.smax.v2i16(<2 x i16> undef) @@ -128,13 +128,13 @@ define i32 @reduce_i8(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i8' -; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 4128 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 5668 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 12844 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 41532 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 400 for instruction: %V8 = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 1044 for instruction: %V16 = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 1304 for instruction: %V32 = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 1952 for instruction: %V64 = call i8 @llvm.vector.reduce.smax.v64i8(<64 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 3248 for instruction: %V128 = call i8 @llvm.vector.reduce.smax.v128i8(<128 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i8 @llvm.vector.reduce.smax.v2i8(<2 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-smin.ll b/llvm/test/Analysis/CostModel/ARM/reduce-smin.ll index 61ec7a4637d72..cd4da54354362 100644 --- a/llvm/test/Analysis/CostModel/ARM/reduce-smin.ll +++ b/llvm/test/Analysis/CostModel/ARM/reduce-smin.ll @@ -21,11 +21,11 @@ define i32 @reduce_i64(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i64' -; MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 282 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 970 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V2 = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V4 = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 568 for instruction: %V8 = call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 1128 for instruction: %V16 = call i64 @llvm.vector.reduce.smin.v16i64(<16 x i64> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> undef) @@ -54,11 +54,11 @@ define i32 @reduce_i32(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i32' -; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 696 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 2504 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %V8 = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %V16 = call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 712 for instruction: %V32 = call i32 @llvm.vector.reduce.smin.v32i32(<32 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> undef) @@ -89,12 +89,12 @@ define i32 @reduce_i16(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i16' -; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 2976 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 10160 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 400 for instruction: %V8 = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 532 for instruction: %V16 = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 860 for instruction: %V32 = call i16 @llvm.vector.reduce.smin.v32i16(<32 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 1516 for instruction: %V64 = call i16 @llvm.vector.reduce.smin.v64i16(<64 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i16 @llvm.vector.reduce.smin.v2i16(<2 x i16> undef) @@ -128,13 +128,13 @@ define i32 @reduce_i8(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i8' -; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 4128 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 5668 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 12844 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 41532 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 400 for instruction: %V8 = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 1044 for instruction: %V16 = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 1304 for instruction: %V32 = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 1952 for instruction: %V64 = call i8 @llvm.vector.reduce.smin.v64i8(<64 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 3248 for instruction: %V128 = call i8 @llvm.vector.reduce.smin.v128i8(<128 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i8 @llvm.vector.reduce.smin.v2i8(<2 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-umax.ll b/llvm/test/Analysis/CostModel/ARM/reduce-umax.ll index 96f24f3c92e93..0447e3826ca55 100644 --- a/llvm/test/Analysis/CostModel/ARM/reduce-umax.ll +++ b/llvm/test/Analysis/CostModel/ARM/reduce-umax.ll @@ -21,11 +21,11 @@ define i32 @reduce_i64(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i64' -; MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 282 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 970 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 568 for instruction: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 1128 for instruction: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef) @@ -54,11 +54,11 @@ define i32 @reduce_i32(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i32' -; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 696 for instruction: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 2504 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 712 for instruction: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) @@ -89,12 +89,12 @@ define i32 @reduce_i16(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i16' -; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 2976 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 10160 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 400 for instruction: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 532 for instruction: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 860 for instruction: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 1516 for instruction: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) @@ -128,13 +128,13 @@ define i32 @reduce_i8(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i8' -; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 4128 for instruction: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 5668 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 12844 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 41532 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 400 for instruction: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 1044 for instruction: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 1304 for instruction: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 1952 for instruction: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 3248 for instruction: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/reduce-umin.ll b/llvm/test/Analysis/CostModel/ARM/reduce-umin.ll index d77ed48b4a5b5..62510ae70f2bc 100644 --- a/llvm/test/Analysis/CostModel/ARM/reduce-umin.ll +++ b/llvm/test/Analysis/CostModel/ARM/reduce-umin.ll @@ -21,11 +21,11 @@ define i32 @reduce_i64(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i64' -; MVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 282 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 970 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 568 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 1128 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) @@ -54,11 +54,11 @@ define i32 @reduce_i32(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i32' -; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 696 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 2504 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 712 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) @@ -89,12 +89,12 @@ define i32 @reduce_i16(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i16' -; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 2976 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 10160 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 400 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 532 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 860 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 1516 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) @@ -128,13 +128,13 @@ define i32 @reduce_i8(i32 %arg) { ; NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; MVE-LABEL: 'reduce_i8' -; MVE-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 788 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 4128 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 5668 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 12844 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) -; MVE-NEXT: Cost Model: Found an estimated cost of 41532 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 400 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 1044 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 1304 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 1952 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) +; MVE-NEXT: Cost Model: Found an estimated cost of 3248 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) ; MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/select.ll b/llvm/test/Analysis/CostModel/ARM/select.ll index 67a558003923c..173904d9e60b7 100644 --- a/llvm/test/Analysis/CostModel/ARM/select.ll +++ b/llvm/test/Analysis/CostModel/ARM/select.ll @@ -18,28 +18,28 @@ define void @selects() { ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4 = select i1 undef, i64 undef, i64 undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5 = select i1 undef, float undef, float undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v6 = select i1 undef, double undef, double undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8 = select <4 x i1> undef, <4 x i8> undef, <4 x i8> undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v9 = select <8 x i1> undef, <8 x i8> undef, <8 x i8> undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v10 = select <16 x i1> undef, <16 x i8> undef, <16 x i8> undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v18 = select <4 x i1> undef, <4 x float> undef, <4 x float> undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v19 = select <2 x i1> undef, <2 x double> undef, <2 x double> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v20 = select <1 x i1> undef, <1 x i32> undef, <1 x i32> undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v21 = select <3 x i1> undef, <3 x float> undef, <3 x float> undef -; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v22 = select <5 x i1> undef, <5 x double> undef, <5 x double> undef ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-NEON-RECIP-LABEL: 'selects' diff --git a/llvm/test/Analysis/CostModel/ARM/shuffle.ll b/llvm/test/Analysis/CostModel/ARM/shuffle.ll index dde0a02bf4731..65f7ea0393a1f 100644 --- a/llvm/test/Analysis/CostModel/ARM/shuffle.ll +++ b/llvm/test/Analysis/CostModel/ARM/shuffle.ll @@ -4,19 +4,19 @@ define void @broadcast() { ; CHECK-MVE-LABEL: 'broadcast' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> zeroinitializer ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> zeroinitializer ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> zeroinitializer ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> zeroinitializer ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> zeroinitializer ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> zeroinitializer ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> zeroinitializer ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> zeroinitializer ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> zeroinitializer ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> zeroinitializer ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> zeroinitializer ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> zeroinitializer @@ -24,8 +24,8 @@ define void @broadcast() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> zeroinitializer ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> zeroinitializer -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> zeroinitializer +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> zeroinitializer ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-NEON-LABEL: 'broadcast' @@ -88,28 +88,28 @@ define void @broadcast() { ;; Reverse shuffles should be lowered to vrev and possibly a vext (for quadwords, on neon) define void @reverse() { ; CHECK-MVE-LABEL: 'reverse' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-NEON-LABEL: 'reverse' @@ -233,28 +233,28 @@ define void @concat() { define void @select() { ; CHECK-MVE-LABEL: 'select' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = shufflevector <2 x half> undef, <2 x half> undef, <2 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = shufflevector <2 x double> undef, <2 x double> undef, <2 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-NEON-LABEL: 'select' @@ -321,16 +321,16 @@ define void @vrev2() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = shufflevector <4 x double> undef, <4 x double> undef, <4 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-NEON-LABEL: 'vrev2' @@ -381,11 +381,11 @@ define void @vrev4() { ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-NEON-LABEL: 'vrev4' diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-icmpcost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-icmpcost.ll index d2961326ba712..6d7555ae59be6 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-icmpcost.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-icmpcost.ll @@ -20,18 +20,18 @@ target triple = "thumbv8.1m.main-arm-none-eabi" ; CHECK: LV: Scalar loop costs: 5. ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %i.016 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %arrayidx = getelementptr inbounds i16, i16* %s, i32 %i.016 -; CHECK: LV: Found an estimated cost of 10 for VF 2 For instruction: %1 = load i16, i16* %arrayidx, align 2 +; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: %1 = load i16, i16* %arrayidx, align 2 ; CHECK: LV: Found an estimated cost of 4 for VF 2 For instruction: %conv = sext i16 %1 to i32 -; CHECK: LV: Found an estimated cost of 12 for VF 2 For instruction: %cmp2 = icmp sgt i32 %conv, %conv1 +; CHECK: LV: Found an estimated cost of 20 for VF 2 For instruction: %cmp2 = icmp sgt i32 %conv, %conv1 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: br i1 %cmp2, label %if.then, label %for.inc -; CHECK: LV: Found an estimated cost of 14 for VF 2 For instruction: %conv6 = add i16 %1, %0 +; CHECK: LV: Found an estimated cost of 26 for VF 2 For instruction: %conv6 = add i16 %1, %0 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %arrayidx7 = getelementptr inbounds i16, i16* %d, i32 %i.016 ; CHECK: LV: Found an estimated cost of 16 for VF 2 For instruction: store i16 %conv6, i16* %arrayidx7, align 2 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: br label %for.inc ; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %inc = add nuw nsw i32 %i.016, 1 ; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %exitcond.not = icmp eq i32 %inc, %n ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body -; CHECK: LV: Vector loop of width 2 costs: 29. +; CHECK: LV: Vector loop of width 2 costs: 43. ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %i.016 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %arrayidx = getelementptr inbounds i16, i16* %s, i32 %i.016 ; CHECK: LV: Found an estimated cost of 2 for VF 4 For instruction: %1 = load i16, i16* %arrayidx, align 2 @@ -50,7 +50,7 @@ target triple = "thumbv8.1m.main-arm-none-eabi" ; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %arrayidx = getelementptr inbounds i16, i16* %s, i32 %i.016 ; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %1 = load i16, i16* %arrayidx, align 2 ; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %conv = sext i16 %1 to i32 -; CHECK: LV: Found an estimated cost of 68 for VF 8 For instruction: %cmp2 = icmp sgt i32 %conv, %conv1 +; CHECK: LV: Found an estimated cost of 36 for VF 8 For instruction: %cmp2 = icmp sgt i32 %conv, %conv1 ; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: br i1 %cmp2, label %if.then, label %for.inc ; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %conv6 = add i16 %1, %0 ; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: %arrayidx7 = getelementptr inbounds i16, i16* %d, i32 %i.016 @@ -59,7 +59,7 @@ target triple = "thumbv8.1m.main-arm-none-eabi" ; CHECK: LV: Found an estimated cost of 1 for VF 8 For instruction: %inc = add nuw nsw i32 %i.016, 1 ; CHECK: LV: Found an estimated cost of 1 for VF 8 For instruction: %exitcond.not = icmp eq i32 %inc, %n ; CHECK: LV: Found an estimated cost of 0 for VF 8 For instruction: br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body -; CHECK: LV: Vector loop of width 8 costs: 9. +; CHECK: LV: Vector loop of width 8 costs: 5. ; CHECK: LV: Selecting VF: 4. define void @expensive_icmp(i16* noalias nocapture %d, i16* nocapture readonly %s, i32 %n, i16 zeroext %m) #0 { entry: @@ -120,22 +120,22 @@ for.inc: ; preds = %for.body, %if.then ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %pDst.addr.010 = phi i8* [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ] ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %pSrcB.addr.09 = phi i8* [ %incdec.ptr2, %while.body ], [ %pSrcB, %while.body.preheader ] ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %incdec.ptr = getelementptr inbounds i8, i8* %pSrcA.addr.011, i32 1 -; CHECK: LV: Found an estimated cost of 10 for VF 2 For instruction: %0 = load i8, i8* %pSrcA.addr.011, align 1 +; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: %0 = load i8, i8* %pSrcA.addr.011, align 1 ; CHECK: LV: Found an estimated cost of 4 for VF 2 For instruction: %conv1 = sext i8 %0 to i32 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %incdec.ptr2 = getelementptr inbounds i8, i8* %pSrcB.addr.09, i32 1 -; CHECK: LV: Found an estimated cost of 10 for VF 2 For instruction: %1 = load i8, i8* %pSrcB.addr.09, align 1 +; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: %1 = load i8, i8* %pSrcB.addr.09, align 1 ; CHECK: LV: Found an estimated cost of 4 for VF 2 For instruction: %conv3 = sext i8 %1 to i32 -; CHECK: LV: Found an estimated cost of 14 for VF 2 For instruction: %mul = mul nsw i32 %conv3, %conv1 -; CHECK: LV: Found an estimated cost of 10 for VF 2 For instruction: %shr = ashr i32 %mul, 7 +; CHECK: LV: Found an estimated cost of 26 for VF 2 For instruction: %mul = mul nsw i32 %conv3, %conv1 +; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: %shr = ashr i32 %mul, 7 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %2 = icmp slt i32 %shr, 127 -; CHECK: LV: Found an estimated cost of 24 for VF 2 For instruction: %spec.select.i = select i1 %2, i32 %shr, i32 127 +; CHECK: LV: Found an estimated cost of 40 for VF 2 For instruction: %spec.select.i = select i1 %2, i32 %shr, i32 127 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %conv4 = trunc i32 %spec.select.i to i8 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %incdec.ptr5 = getelementptr inbounds i8, i8* %pDst.addr.010, i32 1 -; CHECK: LV: Found an estimated cost of 10 for VF 2 For instruction: store i8 %conv4, i8* %pDst.addr.010, align 1 +; CHECK: LV: Found an estimated cost of 18 for VF 2 For instruction: store i8 %conv4, i8* %pDst.addr.010, align 1 ; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %dec = add i32 %blkCnt.012, -1 ; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %cmp.not = icmp eq i32 %dec, 0 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: br i1 %cmp.not, label %while.end.loopexit, label %while.body -; CHECK: LV: Vector loop of width 2 costs: 44. +; CHECK: LV: Vector loop of width 2 costs: 74. ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %blkCnt.012 = phi i32 [ %dec, %while.body ], [ %blockSize, %while.body.preheader ] ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %pSrcA.addr.011 = phi i8* [ %incdec.ptr, %while.body ], [ %pSrcA, %while.body.preheader ] ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %pDst.addr.010 = phi i8* [ %incdec.ptr5, %while.body ], [ %pDst, %while.body.preheader ] @@ -238,8 +238,8 @@ while.end: ; preds = %while.end.loopexit, } ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %cmp1 = fcmp -; CHECK: LV: Found an estimated cost of 10 for VF 2 For instruction: %cmp1 = fcmp -; CHECK: LV: Found an estimated cost of 36 for VF 4 For instruction: %cmp1 = fcmp +; CHECK: LV: Found an estimated cost of 12 for VF 2 For instruction: %cmp1 = fcmp +; CHECK: LV: Found an estimated cost of 24 for VF 4 For instruction: %cmp1 = fcmp define void @floatcmp(float* nocapture readonly %pSrc, i32* nocapture %pDst, i32 %blockSize) #0 { entry: %cmp.not7 = icmp eq i32 %blockSize, 0 diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll index f511a81c29157..006fc47e5c32a 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll @@ -15,10 +15,10 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in "i8_factor_2" -; VF_2: Found an estimated cost of 20 for VF 2 For instruction: %tmp2 = load i8, i8* %tmp0, align 1 +; VF_2: Found an estimated cost of 24 for VF 2 For instruction: %tmp2 = load i8, i8* %tmp0, align 1 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i8, i8* %tmp1, align 1 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp0, align 1 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i8 0, i8* %tmp1, align 1 +; VF_2-NEXT: Found an estimated cost of 8 for VF 2 For instruction: store i8 0, i8* %tmp1, align 1 ; VF_4-LABEL: Checking a loop in "i8_factor_2" ; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i8, i8* %tmp0, align 1 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i8, i8* %tmp1, align 1 @@ -56,10 +56,10 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in "i16_factor_2" -; VF_2: Found an estimated cost of 20 for VF 2 For instruction: %tmp2 = load i16, i16* %tmp0, align 2 +; VF_2: Found an estimated cost of 24 for VF 2 For instruction: %tmp2 = load i16, i16* %tmp0, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i16, i16* %tmp1, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp0, align 2 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i16 0, i16* %tmp1, align 2 +; VF_2-NEXT: Found an estimated cost of 8 for VF 2 For instruction: store i16 0, i16* %tmp1, align 2 ; VF_4-LABEL: Checking a loop in "i16_factor_2" ; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i16, i16* %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, i16* %tmp1, align 2 @@ -97,10 +97,10 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in "i32_factor_2" -; VF_2: Found an estimated cost of 20 for VF 2 For instruction: %tmp2 = load i32, i32* %tmp0, align 4 +; VF_2: Found an estimated cost of 24 for VF 2 For instruction: %tmp2 = load i32, i32* %tmp0, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i32, i32* %tmp1, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp0, align 4 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i32 0, i32* %tmp1, align 4 +; VF_2-NEXT: Found an estimated cost of 8 for VF 2 For instruction: store i32 0, i32* %tmp1, align 4 ; VF_4-LABEL: Checking a loop in "i32_factor_2" ; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i32, i32* %tmp0, align 4 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, i32* %tmp1, align 4 @@ -138,25 +138,25 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in "i64_factor_2" -; VF_2: Found an estimated cost of 24 for VF 2 For instruction: %tmp2 = load i64, i64* %tmp0, align 8 +; VF_2: Found an estimated cost of 44 for VF 2 For instruction: %tmp2 = load i64, i64* %tmp0, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, i64* %tmp1, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 16 for VF 2 For instruction: store i64 0, i64* %tmp1, align 8 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i64 0, i64* %tmp1, align 8 ; VF_4-LABEL: Checking a loop in "i64_factor_2" -; VF_4: Found an estimated cost of 80 for VF 4 For instruction: %tmp2 = load i64, i64* %tmp0, align 8 +; VF_4: Found an estimated cost of 88 for VF 4 For instruction: %tmp2 = load i64, i64* %tmp0, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i64, i64* %tmp1, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 48 for VF 4 For instruction: store i64 0, i64* %tmp1, align 8 +; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i64 0, i64* %tmp1, align 8 ; VF_8-LABEL: Checking a loop in "i64_factor_2" -; VF_8: Found an estimated cost of 288 for VF 8 For instruction: %tmp2 = load i64, i64* %tmp0, align 8 +; VF_8: Found an estimated cost of 176 for VF 8 For instruction: %tmp2 = load i64, i64* %tmp0, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i64, i64* %tmp1, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 160 for VF 8 For instruction: store i64 0, i64* %tmp1, align 8 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i64 0, i64* %tmp1, align 8 ; VF_16-LABEL: Checking a loop in "i64_factor_2" -; VF_16: Found an estimated cost of 1088 for VF 16 For instruction: %tmp2 = load i64, i64* %tmp0, align 8 +; VF_16: Found an estimated cost of 352 for VF 16 For instruction: %tmp2 = load i64, i64* %tmp0, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i64, i64* %tmp1, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 576 for VF 16 For instruction: store i64 0, i64* %tmp1, align 8 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i64 0, i64* %tmp1, align 8 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i64.2, %i64.2* %data, i64 %i, i32 0 @@ -179,15 +179,15 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in "f16_factor_2" -; VF_2: Found an estimated cost of 20 for VF 2 For instruction: %tmp2 = load half, half* %tmp0, align 2 +; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp2 = load half, half* %tmp0, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load half, half* %tmp1, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp0, align 2 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store half 0xH0000, half* %tmp1, align 2 +; VF_2-NEXT: Found an estimated cost of 8 for VF 2 For instruction: store half 0xH0000, half* %tmp1, align 2 ; VF_4-LABEL: Checking a loop in "f16_factor_2" -; VF_4: Found an estimated cost of 72 for VF 4 For instruction: %tmp2 = load half, half* %tmp0, align 2 +; VF_4: Found an estimated cost of 18 for VF 4 For instruction: %tmp2 = load half, half* %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load half, half* %tmp1, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp0, align 2 -; VF_4-NEXT: Found an estimated cost of 40 for VF 4 For instruction: store half 0xH0000, half* %tmp1, align 2 +; VF_4-NEXT: Found an estimated cost of 16 for VF 4 For instruction: store half 0xH0000, half* %tmp1, align 2 ; VF_8-LABEL: Checking a loop in "f16_factor_2" ; VF_8: Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load half, half* %tmp0, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load half, half* %tmp1, align 2 @@ -220,10 +220,10 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in "f32_factor_2" -; VF_2: Found an estimated cost of 20 for VF 2 For instruction: %tmp2 = load float, float* %tmp0, align 4 +; VF_2: Found an estimated cost of 10 for VF 2 For instruction: %tmp2 = load float, float* %tmp0, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load float, float* %tmp1, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp0, align 4 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store float 0.000000e+00, float* %tmp1, align 4 +; VF_2-NEXT: Found an estimated cost of 8 for VF 2 For instruction: store float 0.000000e+00, float* %tmp1, align 4 ; VF_4-LABEL: Checking a loop in "f32_factor_2" ; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load float, float* %tmp0, align 4 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load float, float* %tmp1, align 4 @@ -261,25 +261,25 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in "f64_factor_2" -; VF_2: Found an estimated cost of 20 for VF 2 For instruction: %tmp2 = load double, double* %tmp0, align 8 +; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp2 = load double, double* %tmp0, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load double, double* %tmp1, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp0, align 8 -; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store double 0.000000e+00, double* %tmp1, align 8 +; VF_2-NEXT: Found an estimated cost of 8 for VF 2 For instruction: store double 0.000000e+00, double* %tmp1, align 8 ; VF_4-LABEL: Checking a loop in "f64_factor_2" -; VF_4: Found an estimated cost of 72 for VF 4 For instruction: %tmp2 = load double, double* %tmp0, align 8 +; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp2 = load double, double* %tmp0, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load double, double* %tmp1, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp0, align 8 -; VF_4-NEXT: Found an estimated cost of 40 for VF 4 For instruction: store double 0.000000e+00, double* %tmp1, align 8 +; VF_4-NEXT: Found an estimated cost of 16 for VF 4 For instruction: store double 0.000000e+00, double* %tmp1, align 8 ; VF_8-LABEL: Checking a loop in "f64_factor_2" -; VF_8: Found an estimated cost of 272 for VF 8 For instruction: %tmp2 = load double, double* %tmp0, align 8 +; VF_8: Found an estimated cost of 48 for VF 8 For instruction: %tmp2 = load double, double* %tmp0, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load double, double* %tmp1, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp0, align 8 -; VF_8-NEXT: Found an estimated cost of 144 for VF 8 For instruction: store double 0.000000e+00, double* %tmp1, align 8 +; VF_8-NEXT: Found an estimated cost of 32 for VF 8 For instruction: store double 0.000000e+00, double* %tmp1, align 8 ; VF_16-LABEL: Checking a loop in "f64_factor_2" -; VF_16: Found an estimated cost of 1056 for VF 16 For instruction: %tmp2 = load double, double* %tmp0, align 8 +; VF_16: Found an estimated cost of 96 for VF 16 For instruction: %tmp2 = load double, double* %tmp0, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load double, double* %tmp1, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, double* %tmp0, align 8 -; VF_16-NEXT: Found an estimated cost of 544 for VF 16 For instruction: store double 0.000000e+00, double* %tmp1, align 8 +; VF_16-NEXT: Found an estimated cost of 64 for VF 16 For instruction: store double 0.000000e+00, double* %tmp1, align 8 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %f64.2, %f64.2* %data, i64 %i, i32 0 @@ -306,33 +306,33 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in "i8_factor_3" -; VF_2: Found an estimated cost of 30 for VF 2 For instruction: %tmp3 = load i8, i8* %tmp0, align 1 +; VF_2: Found an estimated cost of 36 for VF 2 For instruction: %tmp3 = load i8, i8* %tmp0, align 1 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i8, i8* %tmp1, align 1 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i8, i8* %tmp2, align 1 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp0, align 1 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp1, align 1 -; VF_2-NEXT: Found an estimated cost of 18 for VF 2 For instruction: store i8 0, i8* %tmp2, align 1 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i8 0, i8* %tmp2, align 1 ; VF_4-LABEL: Checking a loop in "i8_factor_3" -; VF_4: Found an estimated cost of 108 for VF 4 For instruction: %tmp3 = load i8, i8* %tmp0, align 1 +; VF_4: Found an estimated cost of 72 for VF 4 For instruction: %tmp3 = load i8, i8* %tmp0, align 1 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i8, i8* %tmp1, align 1 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i8, i8* %tmp2, align 1 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp0, align 1 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp1, align 1 -; VF_4-NEXT: Found an estimated cost of 60 for VF 4 For instruction: store i8 0, i8* %tmp2, align 1 +; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i8 0, i8* %tmp2, align 1 ; VF_8-LABEL: Checking a loop in "i8_factor_3" -; VF_8: Found an estimated cost of 408 for VF 8 For instruction: %tmp3 = load i8, i8* %tmp0, align 1 +; VF_8: Found an estimated cost of 144 for VF 8 For instruction: %tmp3 = load i8, i8* %tmp0, align 1 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i8, i8* %tmp1, align 1 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i8, i8* %tmp2, align 1 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp0, align 1 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp1, align 1 -; VF_8-NEXT: Found an estimated cost of 216 for VF 8 For instruction: store i8 0, i8* %tmp2, align 1 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i8 0, i8* %tmp2, align 1 ; VF_16-LABEL: Checking a loop in "i8_factor_3" -; VF_16: Found an estimated cost of 1584 for VF 16 For instruction: %tmp3 = load i8, i8* %tmp0, align 1 +; VF_16: Found an estimated cost of 288 for VF 16 For instruction: %tmp3 = load i8, i8* %tmp0, align 1 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i8, i8* %tmp1, align 1 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i8, i8* %tmp2, align 1 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp0, align 1 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp1, align 1 -; VF_16-NEXT: Found an estimated cost of 816 for VF 16 For instruction: store i8 0, i8* %tmp2, align 1 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i8 0, i8* %tmp2, align 1 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i8.3, %i8.3* %data, i64 %i, i32 0 @@ -358,33 +358,33 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in "i16_factor_3" -; VF_2: Found an estimated cost of 30 for VF 2 For instruction: %tmp3 = load i16, i16* %tmp0, align 2 +; VF_2: Found an estimated cost of 36 for VF 2 For instruction: %tmp3 = load i16, i16* %tmp0, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i16, i16* %tmp1, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i16, i16* %tmp2, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp0, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp1, align 2 -; VF_2-NEXT: Found an estimated cost of 18 for VF 2 For instruction: store i16 0, i16* %tmp2, align 2 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i16 0, i16* %tmp2, align 2 ; VF_4-LABEL: Checking a loop in "i16_factor_3" -; VF_4: Found an estimated cost of 108 for VF 4 For instruction: %tmp3 = load i16, i16* %tmp0, align 2 +; VF_4: Found an estimated cost of 72 for VF 4 For instruction: %tmp3 = load i16, i16* %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i16, i16* %tmp1, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i16, i16* %tmp2, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp1, align 2 -; VF_4-NEXT: Found an estimated cost of 60 for VF 4 For instruction: store i16 0, i16* %tmp2, align 2 +; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i16 0, i16* %tmp2, align 2 ; VF_8-LABEL: Checking a loop in "i16_factor_3" -; VF_8: Found an estimated cost of 408 for VF 8 For instruction: %tmp3 = load i16, i16* %tmp0, align 2 +; VF_8: Found an estimated cost of 144 for VF 8 For instruction: %tmp3 = load i16, i16* %tmp0, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i16, i16* %tmp1, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i16, i16* %tmp2, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp0, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp1, align 2 -; VF_8-NEXT: Found an estimated cost of 216 for VF 8 For instruction: store i16 0, i16* %tmp2, align 2 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i16 0, i16* %tmp2, align 2 ; VF_16-LABEL: Checking a loop in "i16_factor_3" -; VF_16: Found an estimated cost of 1584 for VF 16 For instruction: %tmp3 = load i16, i16* %tmp0, align 2 +; VF_16: Found an estimated cost of 288 for VF 16 For instruction: %tmp3 = load i16, i16* %tmp0, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i16, i16* %tmp1, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i16, i16* %tmp2, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp0, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp1, align 2 -; VF_16-NEXT: Found an estimated cost of 816 for VF 16 For instruction: store i16 0, i16* %tmp2, align 2 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i16 0, i16* %tmp2, align 2 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i16.3, %i16.3* %data, i64 %i, i32 0 @@ -410,12 +410,12 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in "i32_factor_3" -; VF_2: Found an estimated cost of 30 for VF 2 For instruction: %tmp3 = load i32, i32* %tmp0, align 4 +; VF_2: Found an estimated cost of 36 for VF 2 For instruction: %tmp3 = load i32, i32* %tmp0, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i32, i32* %tmp1, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i32, i32* %tmp2, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp0, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp1, align 4 -; VF_2-NEXT: Found an estimated cost of 18 for VF 2 For instruction: store i32 0, i32* %tmp2, align 4 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i32 0, i32* %tmp2, align 4 ; VF_4-LABEL: Checking a loop in "i32_factor_3" ; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp3 = load i32, i32* %tmp0, align 4 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i32, i32* %tmp1, align 4 @@ -424,19 +424,19 @@ entry: ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp1, align 4 ; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i32 0, i32* %tmp2, align 4 ; VF_8-LABEL: Checking a loop in "i32_factor_3" -; VF_8: Found an estimated cost of 408 for VF 8 For instruction: %tmp3 = load i32, i32* %tmp0, align 4 +; VF_8: Found an estimated cost of 144 for VF 8 For instruction: %tmp3 = load i32, i32* %tmp0, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i32, i32* %tmp1, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i32, i32* %tmp2, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp0, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp1, align 4 -; VF_8-NEXT: Found an estimated cost of 216 for VF 8 For instruction: store i32 0, i32* %tmp2, align 4 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i32 0, i32* %tmp2, align 4 ; VF_16-LABEL: Checking a loop in "i32_factor_3" -; VF_16: Found an estimated cost of 1584 for VF 16 For instruction: %tmp3 = load i32, i32* %tmp0, align 4 +; VF_16: Found an estimated cost of 288 for VF 16 For instruction: %tmp3 = load i32, i32* %tmp0, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i32, i32* %tmp1, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i32, i32* %tmp2, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp0, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp1, align 4 -; VF_16-NEXT: Found an estimated cost of 816 for VF 16 For instruction: store i32 0, i32* %tmp2, align 4 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i32 0, i32* %tmp2, align 4 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i32.3, %i32.3* %data, i64 %i, i32 0 @@ -462,33 +462,33 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in "i64_factor_3" -; VF_2: Found an estimated cost of 36 for VF 2 For instruction: %tmp3 = load i64, i64* %tmp0, align 8 +; VF_2: Found an estimated cost of 66 for VF 2 For instruction: %tmp3 = load i64, i64* %tmp0, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i64, i64* %tmp1, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i64, i64* %tmp2, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp0, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp1, align 8 -; VF_2-NEXT: Found an estimated cost of 24 for VF 2 For instruction: store i64 0, i64* %tmp2, align 8 +; VF_2-NEXT: Found an estimated cost of 18 for VF 2 For instruction: store i64 0, i64* %tmp2, align 8 ; VF_4-LABEL: Checking a loop in "i64_factor_3" -; VF_4: Found an estimated cost of 120 for VF 4 For instruction: %tmp3 = load i64, i64* %tmp0, align 8 +; VF_4: Found an estimated cost of 132 for VF 4 For instruction: %tmp3 = load i64, i64* %tmp0, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i64, i64* %tmp1, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i64, i64* %tmp2, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp0, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp1, align 8 -; VF_4-NEXT: Found an estimated cost of 72 for VF 4 For instruction: store i64 0, i64* %tmp2, align 8 +; VF_4-NEXT: Found an estimated cost of 36 for VF 4 For instruction: store i64 0, i64* %tmp2, align 8 ; VF_8-LABEL: Checking a loop in "i64_factor_3" -; VF_8: Found an estimated cost of 432 for VF 8 For instruction: %tmp3 = load i64, i64* %tmp0, align 8 +; VF_8: Found an estimated cost of 264 for VF 8 For instruction: %tmp3 = load i64, i64* %tmp0, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i64, i64* %tmp1, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i64, i64* %tmp2, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp0, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp1, align 8 -; VF_8-NEXT: Found an estimated cost of 240 for VF 8 For instruction: store i64 0, i64* %tmp2, align 8 +; VF_8-NEXT: Found an estimated cost of 72 for VF 8 For instruction: store i64 0, i64* %tmp2, align 8 ; VF_16-LABEL: Checking a loop in "i64_factor_3" -; VF_16: Found an estimated cost of 1632 for VF 16 For instruction: %tmp3 = load i64, i64* %tmp0, align 8 +; VF_16: Found an estimated cost of 528 for VF 16 For instruction: %tmp3 = load i64, i64* %tmp0, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i64, i64* %tmp1, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i64, i64* %tmp2, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp0, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp1, align 8 -; VF_16-NEXT: Found an estimated cost of 864 for VF 16 For instruction: store i64 0, i64* %tmp2, align 8 +; VF_16-NEXT: Found an estimated cost of 144 for VF 16 For instruction: store i64 0, i64* %tmp2, align 8 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i64.3, %i64.3* %data, i64 %i, i32 0 @@ -514,33 +514,33 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in "f16_factor_3" -; VF_2: Found an estimated cost of 30 for VF 2 For instruction: %tmp3 = load half, half* %tmp0, align 2 +; VF_2: Found an estimated cost of 18 for VF 2 For instruction: %tmp3 = load half, half* %tmp0, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load half, half* %tmp1, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load half, half* %tmp2, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp0, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp1, align 2 -; VF_2-NEXT: Found an estimated cost of 18 for VF 2 For instruction: store half 0xH0000, half* %tmp2, align 2 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store half 0xH0000, half* %tmp2, align 2 ; VF_4-LABEL: Checking a loop in "f16_factor_3" -; VF_4: Found an estimated cost of 108 for VF 4 For instruction: %tmp3 = load half, half* %tmp0, align 2 +; VF_4: Found an estimated cost of 28 for VF 4 For instruction: %tmp3 = load half, half* %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load half, half* %tmp1, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load half, half* %tmp2, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp1, align 2 -; VF_4-NEXT: Found an estimated cost of 60 for VF 4 For instruction: store half 0xH0000, half* %tmp2, align 2 +; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store half 0xH0000, half* %tmp2, align 2 ; VF_8-LABEL: Checking a loop in "f16_factor_3" -; VF_8: Found an estimated cost of 408 for VF 8 For instruction: %tmp3 = load half, half* %tmp0, align 2 +; VF_8: Found an estimated cost of 56 for VF 8 For instruction: %tmp3 = load half, half* %tmp0, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load half, half* %tmp1, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load half, half* %tmp2, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp0, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp1, align 2 -; VF_8-NEXT: Found an estimated cost of 216 for VF 8 For instruction: store half 0xH0000, half* %tmp2, align 2 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store half 0xH0000, half* %tmp2, align 2 ; VF_16-LABEL: Checking a loop in "f16_factor_3" -; VF_16: Found an estimated cost of 1584 for VF 16 For instruction: %tmp3 = load half, half* %tmp0, align 2 +; VF_16: Found an estimated cost of 112 for VF 16 For instruction: %tmp3 = load half, half* %tmp0, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load half, half* %tmp1, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load half, half* %tmp2, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, half* %tmp0, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, half* %tmp1, align 2 -; VF_16-NEXT: Found an estimated cost of 816 for VF 16 For instruction: store half 0xH0000, half* %tmp2, align 2 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store half 0xH0000, half* %tmp2, align 2 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %f16.3, %f16.3* %data, i64 %i, i32 0 @@ -566,12 +566,12 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in "f32_factor_3" -; VF_2: Found an estimated cost of 30 for VF 2 For instruction: %tmp3 = load float, float* %tmp0, align 4 +; VF_2: Found an estimated cost of 16 for VF 2 For instruction: %tmp3 = load float, float* %tmp0, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load float, float* %tmp1, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load float, float* %tmp2, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp0, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp1, align 4 -; VF_2-NEXT: Found an estimated cost of 18 for VF 2 For instruction: store float 0.000000e+00, float* %tmp2, align 4 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store float 0.000000e+00, float* %tmp2, align 4 ; VF_4-LABEL: Checking a loop in "f32_factor_3" ; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp3 = load float, float* %tmp0, align 4 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load float, float* %tmp1, align 4 @@ -580,19 +580,19 @@ entry: ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp1, align 4 ; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store float 0.000000e+00, float* %tmp2, align 4 ; VF_8-LABEL: Checking a loop in "f32_factor_3" -; VF_8: Found an estimated cost of 408 for VF 8 For instruction: %tmp3 = load float, float* %tmp0, align 4 +; VF_8: Found an estimated cost of 64 for VF 8 For instruction: %tmp3 = load float, float* %tmp0, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load float, float* %tmp1, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load float, float* %tmp2, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp0, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp1, align 4 -; VF_8-NEXT: Found an estimated cost of 216 for VF 8 For instruction: store float 0.000000e+00, float* %tmp2, align 4 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store float 0.000000e+00, float* %tmp2, align 4 ; VF_16-LABEL: Checking a loop in "f32_factor_3" -; VF_16: Found an estimated cost of 1584 for VF 16 For instruction: %tmp3 = load float, float* %tmp0, align 4 +; VF_16: Found an estimated cost of 128 for VF 16 For instruction: %tmp3 = load float, float* %tmp0, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load float, float* %tmp1, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load float, float* %tmp2, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, float* %tmp0, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, float* %tmp1, align 4 -; VF_16-NEXT: Found an estimated cost of 816 for VF 16 For instruction: store float 0.000000e+00, float* %tmp2, align 4 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store float 0.000000e+00, float* %tmp2, align 4 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %f32.3, %f32.3* %data, i64 %i, i32 0 @@ -618,33 +618,33 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in "f64_factor_3" -; VF_2: Found an estimated cost of 30 for VF 2 For instruction: %tmp3 = load double, double* %tmp0, align 8 +; VF_2: Found an estimated cost of 18 for VF 2 For instruction: %tmp3 = load double, double* %tmp0, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load double, double* %tmp1, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load double, double* %tmp2, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp0, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp1, align 8 -; VF_2-NEXT: Found an estimated cost of 18 for VF 2 For instruction: store double 0.000000e+00, double* %tmp2, align 8 +; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store double 0.000000e+00, double* %tmp2, align 8 ; VF_4-LABEL: Checking a loop in "f64_factor_3" -; VF_4: Found an estimated cost of 108 for VF 4 For instruction: %tmp3 = load double, double* %tmp0, align 8 +; VF_4: Found an estimated cost of 36 for VF 4 For instruction: %tmp3 = load double, double* %tmp0, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load double, double* %tmp1, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load double, double* %tmp2, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp0, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp1, align 8 -; VF_4-NEXT: Found an estimated cost of 60 for VF 4 For instruction: store double 0.000000e+00, double* %tmp2, align 8 +; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store double 0.000000e+00, double* %tmp2, align 8 ; VF_8-LABEL: Checking a loop in "f64_factor_3" -; VF_8: Found an estimated cost of 408 for VF 8 For instruction: %tmp3 = load double, double* %tmp0, align 8 +; VF_8: Found an estimated cost of 72 for VF 8 For instruction: %tmp3 = load double, double* %tmp0, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load double, double* %tmp1, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load double, double* %tmp2, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp0, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp1, align 8 -; VF_8-NEXT: Found an estimated cost of 216 for VF 8 For instruction: store double 0.000000e+00, double* %tmp2, align 8 +; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store double 0.000000e+00, double* %tmp2, align 8 ; VF_16-LABEL: Checking a loop in "f64_factor_3" -; VF_16: Found an estimated cost of 1584 for VF 16 For instruction: %tmp3 = load double, double* %tmp0, align 8 +; VF_16: Found an estimated cost of 144 for VF 16 For instruction: %tmp3 = load double, double* %tmp0, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load double, double* %tmp1, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load double, double* %tmp2, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, double* %tmp0, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, double* %tmp1, align 8 -; VF_16-NEXT: Found an estimated cost of 816 for VF 16 For instruction: store double 0.000000e+00, double* %tmp2, align 8 +; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store double 0.000000e+00, double* %tmp2, align 8 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %f64.3, %f64.3* %data, i64 %i, i32 0 @@ -673,41 +673,41 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in "i8_factor_4" -; VF_2: Found an estimated cost of 40 for VF 2 For instruction: %tmp4 = load i8, i8* %tmp0, align 1 +; VF_2: Found an estimated cost of 48 for VF 2 For instruction: %tmp4 = load i8, i8* %tmp0, align 1 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i8, i8* %tmp1, align 1 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i8, i8* %tmp2, align 1 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i8, i8* %tmp3, align 1 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp0, align 1 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp1, align 1 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp2, align 1 -; VF_2-NEXT: Found an estimated cost of 24 for VF 2 For instruction: store i8 0, i8* %tmp3, align 1 +; VF_2-NEXT: Found an estimated cost of 16 for VF 2 For instruction: store i8 0, i8* %tmp3, align 1 ; VF_4-LABEL: Checking a loop in "i8_factor_4" -; VF_4: Found an estimated cost of 144 for VF 4 For instruction: %tmp4 = load i8, i8* %tmp0, align 1 +; VF_4: Found an estimated cost of 96 for VF 4 For instruction: %tmp4 = load i8, i8* %tmp0, align 1 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i8, i8* %tmp1, align 1 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i8, i8* %tmp2, align 1 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i8, i8* %tmp3, align 1 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp0, align 1 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp1, align 1 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp2, align 1 -; VF_4-NEXT: Found an estimated cost of 80 for VF 4 For instruction: store i8 0, i8* %tmp3, align 1 +; VF_4-NEXT: Found an estimated cost of 32 for VF 4 For instruction: store i8 0, i8* %tmp3, align 1 ; VF_8-LABEL: Checking a loop in "i8_factor_4" -; VF_8: Found an estimated cost of 544 for VF 8 For instruction: %tmp4 = load i8, i8* %tmp0, align 1 +; VF_8: Found an estimated cost of 192 for VF 8 For instruction: %tmp4 = load i8, i8* %tmp0, align 1 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i8, i8* %tmp1, align 1 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i8, i8* %tmp2, align 1 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i8, i8* %tmp3, align 1 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp0, align 1 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp1, align 1 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp2, align 1 -; VF_8-NEXT: Found an estimated cost of 288 for VF 8 For instruction: store i8 0, i8* %tmp3, align 1 +; VF_8-NEXT: Found an estimated cost of 64 for VF 8 For instruction: store i8 0, i8* %tmp3, align 1 ; VF_16-LABEL: Checking a loop in "i8_factor_4" -; VF_16: Found an estimated cost of 2112 for VF 16 For instruction: %tmp4 = load i8, i8* %tmp0, align 1 +; VF_16: Found an estimated cost of 384 for VF 16 For instruction: %tmp4 = load i8, i8* %tmp0, align 1 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i8, i8* %tmp1, align 1 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i8, i8* %tmp2, align 1 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i8, i8* %tmp3, align 1 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp0, align 1 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp1, align 1 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp2, align 1 -; VF_16-NEXT: Found an estimated cost of 1088 for VF 16 For instruction: store i8 0, i8* %tmp3, align 1 +; VF_16-NEXT: Found an estimated cost of 128 for VF 16 For instruction: store i8 0, i8* %tmp3, align 1 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i8.4, %i8.4* %data, i64 %i, i32 0 @@ -736,41 +736,41 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in "i16_factor_4" -; VF_2: Found an estimated cost of 40 for VF 2 For instruction: %tmp4 = load i16, i16* %tmp0, align 2 +; VF_2: Found an estimated cost of 48 for VF 2 For instruction: %tmp4 = load i16, i16* %tmp0, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i16, i16* %tmp1, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i16, i16* %tmp2, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i16, i16* %tmp3, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp0, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp1, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp2, align 2 -; VF_2-NEXT: Found an estimated cost of 24 for VF 2 For instruction: store i16 0, i16* %tmp3, align 2 +; VF_2-NEXT: Found an estimated cost of 16 for VF 2 For instruction: store i16 0, i16* %tmp3, align 2 ; VF_4-LABEL: Checking a loop in "i16_factor_4" -; VF_4: Found an estimated cost of 144 for VF 4 For instruction: %tmp4 = load i16, i16* %tmp0, align 2 +; VF_4: Found an estimated cost of 96 for VF 4 For instruction: %tmp4 = load i16, i16* %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i16, i16* %tmp1, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i16, i16* %tmp2, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i16, i16* %tmp3, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp1, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp2, align 2 -; VF_4-NEXT: Found an estimated cost of 80 for VF 4 For instruction: store i16 0, i16* %tmp3, align 2 +; VF_4-NEXT: Found an estimated cost of 32 for VF 4 For instruction: store i16 0, i16* %tmp3, align 2 ; VF_8-LABEL: Checking a loop in "i16_factor_4" -; VF_8: Found an estimated cost of 544 for VF 8 For instruction: %tmp4 = load i16, i16* %tmp0, align 2 +; VF_8: Found an estimated cost of 192 for VF 8 For instruction: %tmp4 = load i16, i16* %tmp0, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i16, i16* %tmp1, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i16, i16* %tmp2, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i16, i16* %tmp3, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp0, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp1, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp2, align 2 -; VF_8-NEXT: Found an estimated cost of 288 for VF 8 For instruction: store i16 0, i16* %tmp3, align 2 +; VF_8-NEXT: Found an estimated cost of 64 for VF 8 For instruction: store i16 0, i16* %tmp3, align 2 ; VF_16-LABEL: Checking a loop in "i16_factor_4" -; VF_16: Found an estimated cost of 2112 for VF 16 For instruction: %tmp4 = load i16, i16* %tmp0, align 2 +; VF_16: Found an estimated cost of 384 for VF 16 For instruction: %tmp4 = load i16, i16* %tmp0, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i16, i16* %tmp1, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i16, i16* %tmp2, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i16, i16* %tmp3, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp0, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp1, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp2, align 2 -; VF_16-NEXT: Found an estimated cost of 1088 for VF 16 For instruction: store i16 0, i16* %tmp3, align 2 +; VF_16-NEXT: Found an estimated cost of 128 for VF 16 For instruction: store i16 0, i16* %tmp3, align 2 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i16.4, %i16.4* %data, i64 %i, i32 0 @@ -799,14 +799,14 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in "i32_factor_4" -; VF_2: Found an estimated cost of 40 for VF 2 For instruction: %tmp4 = load i32, i32* %tmp0, align 4 +; VF_2: Found an estimated cost of 48 for VF 2 For instruction: %tmp4 = load i32, i32* %tmp0, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i32, i32* %tmp1, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i32, i32* %tmp2, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i32, i32* %tmp3, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp0, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp1, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp2, align 4 -; VF_2-NEXT: Found an estimated cost of 24 for VF 2 For instruction: store i32 0, i32* %tmp3, align 4 +; VF_2-NEXT: Found an estimated cost of 16 for VF 2 For instruction: store i32 0, i32* %tmp3, align 4 ; VF_4-LABEL: Checking a loop in "i32_factor_4" ; VF_4: Found an estimated cost of 32 for VF 4 For instruction: %tmp4 = load i32, i32* %tmp0, align 4 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i32, i32* %tmp1, align 4 @@ -817,23 +817,23 @@ entry: ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp2, align 4 ; VF_4-NEXT: Found an estimated cost of 32 for VF 4 For instruction: store i32 0, i32* %tmp3, align 4 ; VF_8-LABEL: Checking a loop in "i32_factor_4" -; VF_8: Found an estimated cost of 544 for VF 8 For instruction: %tmp4 = load i32, i32* %tmp0, align 4 +; VF_8: Found an estimated cost of 192 for VF 8 For instruction: %tmp4 = load i32, i32* %tmp0, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i32, i32* %tmp1, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i32, i32* %tmp2, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i32, i32* %tmp3, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp0, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp1, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp2, align 4 -; VF_8-NEXT: Found an estimated cost of 288 for VF 8 For instruction: store i32 0, i32* %tmp3, align 4 +; VF_8-NEXT: Found an estimated cost of 64 for VF 8 For instruction: store i32 0, i32* %tmp3, align 4 ; VF_16-LABEL: Checking a loop in "i32_factor_4" -; VF_16: Found an estimated cost of 2112 for VF 16 For instruction: %tmp4 = load i32, i32* %tmp0, align 4 +; VF_16: Found an estimated cost of 384 for VF 16 For instruction: %tmp4 = load i32, i32* %tmp0, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i32, i32* %tmp1, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i32, i32* %tmp2, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i32, i32* %tmp3, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp0, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp1, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp2, align 4 -; VF_16-NEXT: Found an estimated cost of 1088 for VF 16 For instruction: store i32 0, i32* %tmp3, align 4 +; VF_16-NEXT: Found an estimated cost of 128 for VF 16 For instruction: store i32 0, i32* %tmp3, align 4 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i32.4, %i32.4* %data, i64 %i, i32 0 @@ -862,41 +862,41 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in "i64_factor_4" -; VF_2: Found an estimated cost of 48 for VF 2 For instruction: %tmp4 = load i64, i64* %tmp0, align 8 +; VF_2: Found an estimated cost of 88 for VF 2 For instruction: %tmp4 = load i64, i64* %tmp0, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i64, i64* %tmp1, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i64, i64* %tmp2, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i64, i64* %tmp3, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp0, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp1, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp2, align 8 -; VF_2-NEXT: Found an estimated cost of 32 for VF 2 For instruction: store i64 0, i64* %tmp3, align 8 +; VF_2-NEXT: Found an estimated cost of 24 for VF 2 For instruction: store i64 0, i64* %tmp3, align 8 ; VF_4-LABEL: Checking a loop in "i64_factor_4" -; VF_4: Found an estimated cost of 160 for VF 4 For instruction: %tmp4 = load i64, i64* %tmp0, align 8 +; VF_4: Found an estimated cost of 176 for VF 4 For instruction: %tmp4 = load i64, i64* %tmp0, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i64, i64* %tmp1, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i64, i64* %tmp2, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i64, i64* %tmp3, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp0, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp1, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp2, align 8 -; VF_4-NEXT: Found an estimated cost of 96 for VF 4 For instruction: store i64 0, i64* %tmp3, align 8 +; VF_4-NEXT: Found an estimated cost of 48 for VF 4 For instruction: store i64 0, i64* %tmp3, align 8 ; VF_8-LABEL: Checking a loop in "i64_factor_4" -; VF_8: Found an estimated cost of 576 for VF 8 For instruction: %tmp4 = load i64, i64* %tmp0, align 8 +; VF_8: Found an estimated cost of 352 for VF 8 For instruction: %tmp4 = load i64, i64* %tmp0, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i64, i64* %tmp1, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i64, i64* %tmp2, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i64, i64* %tmp3, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp0, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp1, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp2, align 8 -; VF_8-NEXT: Found an estimated cost of 320 for VF 8 For instruction: store i64 0, i64* %tmp3, align 8 +; VF_8-NEXT: Found an estimated cost of 96 for VF 8 For instruction: store i64 0, i64* %tmp3, align 8 ; VF_16-LABEL: Checking a loop in "i64_factor_4" -; VF_16: Found an estimated cost of 2176 for VF 16 For instruction: %tmp4 = load i64, i64* %tmp0, align 8 +; VF_16: Found an estimated cost of 704 for VF 16 For instruction: %tmp4 = load i64, i64* %tmp0, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i64, i64* %tmp1, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i64, i64* %tmp2, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i64, i64* %tmp3, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp0, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp1, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp2, align 8 -; VF_16-NEXT: Found an estimated cost of 1152 for VF 16 For instruction: store i64 0, i64* %tmp3, align 8 +; VF_16-NEXT: Found an estimated cost of 192 for VF 16 For instruction: store i64 0, i64* %tmp3, align 8 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %i64.4, %i64.4* %data, i64 %i, i32 0 @@ -925,41 +925,41 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in "f16_factor_4" -; VF_2: Found an estimated cost of 40 for VF 2 For instruction: %tmp4 = load half, half* %tmp0, align 2 +; VF_2: Found an estimated cost of 18 for VF 2 For instruction: %tmp4 = load half, half* %tmp0, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load half, half* %tmp1, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load half, half* %tmp2, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load half, half* %tmp3, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp0, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp1, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp2, align 2 -; VF_2-NEXT: Found an estimated cost of 24 for VF 2 For instruction: store half 0xH0000, half* %tmp3, align 2 +; VF_2-NEXT: Found an estimated cost of 16 for VF 2 For instruction: store half 0xH0000, half* %tmp3, align 2 ; VF_4-LABEL: Checking a loop in "f16_factor_4" -; VF_4: Found an estimated cost of 144 for VF 4 For instruction: %tmp4 = load half, half* %tmp0, align 2 +; VF_4: Found an estimated cost of 36 for VF 4 For instruction: %tmp4 = load half, half* %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load half, half* %tmp1, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load half, half* %tmp2, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load half, half* %tmp3, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp1, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp2, align 2 -; VF_4-NEXT: Found an estimated cost of 80 for VF 4 For instruction: store half 0xH0000, half* %tmp3, align 2 +; VF_4-NEXT: Found an estimated cost of 32 for VF 4 For instruction: store half 0xH0000, half* %tmp3, align 2 ; VF_8-LABEL: Checking a loop in "f16_factor_4" -; VF_8: Found an estimated cost of 544 for VF 8 For instruction: %tmp4 = load half, half* %tmp0, align 2 +; VF_8: Found an estimated cost of 72 for VF 8 For instruction: %tmp4 = load half, half* %tmp0, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load half, half* %tmp1, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load half, half* %tmp2, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load half, half* %tmp3, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp0, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp1, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp2, align 2 -; VF_8-NEXT: Found an estimated cost of 288 for VF 8 For instruction: store half 0xH0000, half* %tmp3, align 2 +; VF_8-NEXT: Found an estimated cost of 64 for VF 8 For instruction: store half 0xH0000, half* %tmp3, align 2 ; VF_16-LABEL: Checking a loop in "f16_factor_4" -; VF_16: Found an estimated cost of 2112 for VF 16 For instruction: %tmp4 = load half, half* %tmp0, align 2 +; VF_16: Found an estimated cost of 144 for VF 16 For instruction: %tmp4 = load half, half* %tmp0, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load half, half* %tmp1, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load half, half* %tmp2, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load half, half* %tmp3, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, half* %tmp0, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, half* %tmp1, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, half* %tmp2, align 2 -; VF_16-NEXT: Found an estimated cost of 1088 for VF 16 For instruction: store half 0xH0000, half* %tmp3, align 2 +; VF_16-NEXT: Found an estimated cost of 128 for VF 16 For instruction: store half 0xH0000, half* %tmp3, align 2 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %f16.4, %f16.4* %data, i64 %i, i32 0 @@ -988,14 +988,14 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in "f32_factor_4" -; VF_2: Found an estimated cost of 40 for VF 2 For instruction: %tmp4 = load float, float* %tmp0, align 4 +; VF_2: Found an estimated cost of 20 for VF 2 For instruction: %tmp4 = load float, float* %tmp0, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load float, float* %tmp1, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load float, float* %tmp2, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load float, float* %tmp3, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp0, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp1, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp2, align 4 -; VF_2-NEXT: Found an estimated cost of 24 for VF 2 For instruction: store float 0.000000e+00, float* %tmp3, align 4 +; VF_2-NEXT: Found an estimated cost of 16 for VF 2 For instruction: store float 0.000000e+00, float* %tmp3, align 4 ; VF_4-LABEL: Checking a loop in "f32_factor_4" ; VF_4: Found an estimated cost of 32 for VF 4 For instruction: %tmp4 = load float, float* %tmp0, align 4 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load float, float* %tmp1, align 4 @@ -1006,23 +1006,23 @@ entry: ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp2, align 4 ; VF_4-NEXT: Found an estimated cost of 32 for VF 4 For instruction: store float 0.000000e+00, float* %tmp3, align 4 ; VF_8-LABEL: Checking a loop in "f32_factor_4" -; VF_8: Found an estimated cost of 544 for VF 8 For instruction: %tmp4 = load float, float* %tmp0, align 4 +; VF_8: Found an estimated cost of 80 for VF 8 For instruction: %tmp4 = load float, float* %tmp0, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load float, float* %tmp1, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load float, float* %tmp2, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load float, float* %tmp3, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp0, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp1, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp2, align 4 -; VF_8-NEXT: Found an estimated cost of 288 for VF 8 For instruction: store float 0.000000e+00, float* %tmp3, align 4 +; VF_8-NEXT: Found an estimated cost of 64 for VF 8 For instruction: store float 0.000000e+00, float* %tmp3, align 4 ; VF_16-LABEL: Checking a loop in "f32_factor_4" -; VF_16: Found an estimated cost of 2112 for VF 16 For instruction: %tmp4 = load float, float* %tmp0, align 4 +; VF_16: Found an estimated cost of 160 for VF 16 For instruction: %tmp4 = load float, float* %tmp0, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load float, float* %tmp1, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load float, float* %tmp2, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load float, float* %tmp3, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, float* %tmp0, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, float* %tmp1, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, float* %tmp2, align 4 -; VF_16-NEXT: Found an estimated cost of 1088 for VF 16 For instruction: store float 0.000000e+00, float* %tmp3, align 4 +; VF_16-NEXT: Found an estimated cost of 128 for VF 16 For instruction: store float 0.000000e+00, float* %tmp3, align 4 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %f32.4, %f32.4* %data, i64 %i, i32 0 @@ -1051,41 +1051,41 @@ entry: br label %for.body ; VF_2-LABEL: Checking a loop in "f64_factor_4" -; VF_2: Found an estimated cost of 40 for VF 2 For instruction: %tmp4 = load double, double* %tmp0, align 8 +; VF_2: Found an estimated cost of 24 for VF 2 For instruction: %tmp4 = load double, double* %tmp0, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load double, double* %tmp1, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load double, double* %tmp2, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load double, double* %tmp3, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp0, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp1, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp2, align 8 -; VF_2-NEXT: Found an estimated cost of 24 for VF 2 For instruction: store double 0.000000e+00, double* %tmp3, align 8 +; VF_2-NEXT: Found an estimated cost of 16 for VF 2 For instruction: store double 0.000000e+00, double* %tmp3, align 8 ; VF_4-LABEL: Checking a loop in "f64_factor_4" -; VF_4: Found an estimated cost of 144 for VF 4 For instruction: %tmp4 = load double, double* %tmp0, align 8 +; VF_4: Found an estimated cost of 48 for VF 4 For instruction: %tmp4 = load double, double* %tmp0, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load double, double* %tmp1, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load double, double* %tmp2, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load double, double* %tmp3, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp0, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp1, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp2, align 8 -; VF_4-NEXT: Found an estimated cost of 80 for VF 4 For instruction: store double 0.000000e+00, double* %tmp3, align 8 +; VF_4-NEXT: Found an estimated cost of 32 for VF 4 For instruction: store double 0.000000e+00, double* %tmp3, align 8 ; VF_8-LABEL: Checking a loop in "f64_factor_4" -; VF_8: Found an estimated cost of 544 for VF 8 For instruction: %tmp4 = load double, double* %tmp0, align 8 +; VF_8: Found an estimated cost of 96 for VF 8 For instruction: %tmp4 = load double, double* %tmp0, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load double, double* %tmp1, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load double, double* %tmp2, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load double, double* %tmp3, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp0, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp1, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp2, align 8 -; VF_8-NEXT: Found an estimated cost of 288 for VF 8 For instruction: store double 0.000000e+00, double* %tmp3, align 8 +; VF_8-NEXT: Found an estimated cost of 64 for VF 8 For instruction: store double 0.000000e+00, double* %tmp3, align 8 ; VF_16-LABEL: Checking a loop in "f64_factor_4" -; VF_16: Found an estimated cost of 2112 for VF 16 For instruction: %tmp4 = load double, double* %tmp0, align 8 +; VF_16: Found an estimated cost of 192 for VF 16 For instruction: %tmp4 = load double, double* %tmp0, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load double, double* %tmp1, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load double, double* %tmp2, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load double, double* %tmp3, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, double* %tmp0, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, double* %tmp1, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, double* %tmp2, align 8 -; VF_16-NEXT: Found an estimated cost of 1088 for VF 16 For instruction: store double 0.000000e+00, double* %tmp3, align 8 +; VF_16-NEXT: Found an estimated cost of 128 for VF 16 For instruction: store double 0.000000e+00, double* %tmp3, align 8 for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %tmp0 = getelementptr inbounds %f64.4, %f64.4* %data, i64 %i, i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-saddsatcost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-saddsatcost.ll index 3356db935eea6..30bc06a7a0b1a 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-saddsatcost.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-saddsatcost.ll @@ -8,7 +8,7 @@ target triple = "thumbv8.1m.main-arm-none-eabi" ; CHECK-COST-LABEL: arm_offset_q15 ; CHECK-COST: Found an estimated cost of 10 for VF 1 For instruction: %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset) -; CHECK-COST: Found an estimated cost of 28 for VF 2 For instruction: %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset) +; CHECK-COST: Found an estimated cost of 36 for VF 2 For instruction: %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset) ; CHECK-COST: Found an estimated cost of 8 for VF 4 For instruction: %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset) ; CHECK-COST: Found an estimated cost of 2 for VF 8 For instruction: %1 = tail call i16 @llvm.sadd.sat.i16(i16 %0, i16 %offset) diff --git a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll index 730e26af0bb26..59422cb07d6a6 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll @@ -688,19 +688,37 @@ end: define hidden void @pointer_phi_v4half_add3(half* noalias nocapture readonly %A, half* noalias nocapture %B, half %y) { ; CHECK-LABEL: @pointer_phi_v4half_add3( ; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr half, half* [[A:%.*]], i32 2976 +; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr half, half* [[B:%.*]], i32 992 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[Y:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x half> [[BROADCAST_SPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[INDEX]], 3 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr half, half* [[A]], i32 [[TMP0]] +; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr half, half* [[B]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[NEXT_GEP]] to <24 x half>* +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <24 x half>, <24 x half>* [[TMP1]], align 4 +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <24 x half> [[WIDE_VEC]], <24 x half> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <8 x half> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast half* [[NEXT_GEP4]] to <8 x half>* +; CHECK-NEXT: store <8 x half> [[TMP2]], <8 x half>* [[TMP3]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992 +; CHECK-NEXT: br i1 [[TMP4]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], [[LOOP22:!llvm.loop !.*]] ; CHECK: for.body: -; CHECK-NEXT: [[A_ADDR_09:%.*]] = phi half* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ] -; CHECK-NEXT: [[B_ADDR_07:%.*]] = phi half* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[B:%.*]], [[ENTRY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = load half, half* [[A_ADDR_09]], align 4 +; CHECK-NEXT: [[A_ADDR_09:%.*]] = phi half* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 992, [[VECTOR_BODY]] ] +; CHECK-NEXT: [[B_ADDR_07:%.*]] = phi half* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END3]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = load half, half* [[A_ADDR_09]], align 4 ; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds half, half* [[A_ADDR_09]], i32 3 -; CHECK-NEXT: [[ADD:%.*]] = fadd fast half [[TMP0]], [[Y:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = fadd fast half [[TMP5]], [[Y]] ; CHECK-NEXT: store half [[ADD]], half* [[B_ADDR_07]], align 4 ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds half, half* [[B_ADDR_07]], i32 1 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], [[LOOP23:!llvm.loop !.*]] ; CHECK: end: ; CHECK-NEXT: ret void ; @@ -753,7 +771,7 @@ define hidden void @pointer_phi_v4i32_uf2(i32* noalias nocapture readonly %A, i3 ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 9992 ; CHECK-NEXT: [[PTR_IND]] = getelementptr i32, i32* [[POINTER_PHI]], i32 48 -; CHECK-NEXT: br i1 [[TMP7]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], [[LOOP22:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP7]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], [[LOOP24:!llvm.loop !.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; CHECK: for.body: @@ -767,7 +785,7 @@ define hidden void @pointer_phi_v4i32_uf2(i32* noalias nocapture readonly %A, i3 ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i32, i32* [[B_ADDR_06]], i32 1 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_07]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 10000 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]], [[LOOP23:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]], [[LOOP25:!llvm.loop !.*]] ; entry: @@ -837,7 +855,7 @@ define hidden void @pointer_phi_v4i32_uf4(i32* noalias nocapture readonly %A, i3 ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 16 ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], 9984 ; CHECK-NEXT: [[PTR_IND]] = getelementptr i32, i32* [[POINTER_PHI]], i32 96 -; CHECK-NEXT: br i1 [[TMP15]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], [[LOOP24:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP15]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], [[LOOP26:!llvm.loop !.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; CHECK: for.body: @@ -851,7 +869,7 @@ define hidden void @pointer_phi_v4i32_uf4(i32* noalias nocapture readonly %A, i3 ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i32, i32* [[B_ADDR_06]], i32 1 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_07]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 10000 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]], [[LOOP25:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]], [[LOOP27:!llvm.loop !.*]] ; entry: br label %for.body @@ -893,23 +911,23 @@ define hidden void @mult_ptr_iv(i8* noalias nocapture readonly %x, i8* noalias n ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, i8* [[POINTER_PHI5]], <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP0]], i32 1 -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> [[TMP0]], i32 1, <4 x i1> , <4 x i8> undef), !alias.scope !26 +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> [[TMP0]], i32 1, <4 x i1> , <4 x i8> undef), !alias.scope !28 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP0]], i32 2 -; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> [[TMP2]], i32 1, <4 x i1> , <4 x i8> undef), !alias.scope !26 -; CHECK-NEXT: [[WIDE_MASKED_GATHER8:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> [[TMP3]], i32 1, <4 x i1> , <4 x i8> undef), !alias.scope !26 +; CHECK-NEXT: [[WIDE_MASKED_GATHER7:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> [[TMP2]], i32 1, <4 x i1> , <4 x i8> undef), !alias.scope !28 +; CHECK-NEXT: [[WIDE_MASKED_GATHER8:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> [[TMP3]], i32 1, <4 x i1> , <4 x i8> undef), !alias.scope !28 ; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], ; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER7]] ; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER8]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP1]], i32 1 -; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> [[TMP4]], <4 x i8*> [[TMP1]], i32 1, <4 x i1> ), !alias.scope !29, !noalias !26 +; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> [[TMP4]], <4 x i8*> [[TMP1]], i32 1, <4 x i1> ), !alias.scope !31, !noalias !28 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP1]], i32 2 -; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> [[TMP5]], <4 x i8*> [[TMP7]], i32 1, <4 x i1> ), !alias.scope !29, !noalias !26 -; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> [[TMP6]], <4 x i8*> [[TMP8]], i32 1, <4 x i1> ), !alias.scope !29, !noalias !26 +; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> [[TMP5]], <4 x i8*> [[TMP7]], i32 1, <4 x i1> ), !alias.scope !31, !noalias !28 +; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> [[TMP6]], <4 x i8*> [[TMP8]], i32 1, <4 x i1> ), !alias.scope !31, !noalias !28 ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, i8* [[POINTER_PHI]], i32 12 ; CHECK-NEXT: [[PTR_IND6]] = getelementptr i8, i8* [[POINTER_PHI5]], i32 12 -; CHECK-NEXT: br i1 [[TMP9]], label [[END:%.*]], label [[VECTOR_BODY]], [[LOOP31:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP9]], label [[END:%.*]], label [[VECTOR_BODY]], [[LOOP33:!llvm.loop !.*]] ; CHECK: for.body: ; CHECK-NEXT: [[X_ADDR_050:%.*]] = phi i8* [ [[INCDEC_PTR2:%.*]], [[FOR_BODY]] ], [ [[X]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[Z_ADDR_049:%.*]] = phi i8* [ [[INCDEC_PTR34:%.*]], [[FOR_BODY]] ], [ [[Z]], [[ENTRY]] ] @@ -931,7 +949,7 @@ define hidden void @mult_ptr_iv(i8* noalias nocapture readonly %x, i8* noalias n ; CHECK-NEXT: store i8 [[MUL2]], i8* [[INCDEC_PTR33]], align 1 ; CHECK-NEXT: [[INC]] = add nuw i32 [[I_048]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[END]], label [[FOR_BODY]], [[LOOP32:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[END]], label [[FOR_BODY]], [[LOOP34:!llvm.loop !.*]] ; CHECK: end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll index b67475f19fd18..7e33b331aaa82 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll @@ -465,25 +465,58 @@ for.body: define void @fptrunc_not_allowed(float* noalias nocapture %A, float* noalias nocapture readonly %B, float* noalias nocapture readonly %C, half* noalias nocapture %D) #0 { ; CHECK-LABEL: @fptrunc_not_allowed( ; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[TMP5]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP7]], <4 x float>* [[TMP10]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = fptrunc <4 x float> [[TMP7]] to <4 x half> +; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <4 x half> [[TMP11]], +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds half, half* [[D:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds half, half* [[TMP13]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast half* [[TMP14]] to <4 x half>* +; CHECK-NEXT: store <4 x half> [[TMP12]], <4 x half>* [[TMP15]], align 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], 428 +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP12:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 431, 428 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 428, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; CHECK: for.body: -; CHECK-NEXT: [[I_017:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD6:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i32 [[I_017]] -; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i32 [[I_017]] -; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX1]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP1]], [[TMP0]] -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i32 [[I_017]] +; CHECK-NEXT: [[I_017:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD6:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[B]], i32 [[I_017]] +; CHECK-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, float* [[C]], i32 [[I_017]] +; CHECK-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP18]], [[TMP17]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A]], i32 [[I_017]] ; CHECK-NEXT: store float [[ADD]], float* [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[CONV:%.*]] = fptrunc float [[ADD]] to half ; CHECK-NEXT: [[FACTOR:%.*]] = fmul fast half [[CONV]], 0xH4000 -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds half, half* [[D:%.*]], i32 [[I_017]] +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds half, half* [[D]], i32 [[I_017]] ; CHECK-NEXT: store half [[FACTOR]], half* [[ARRAYIDX5]], align 2 ; CHECK-NEXT: [[ADD6]] = add nuw nsw i32 [[I_017]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[ADD6]], 431 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], [[LOOP13:!llvm.loop !.*]] ; entry: br label %for.body @@ -591,7 +624,7 @@ define i32 @i32_smin_reduction(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP12:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP14:!llvm.loop !.*]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] @@ -609,7 +642,7 @@ define i32 @i32_smin_reduction(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[ADD]] = select i1 [[C]], i32 [[R_07]], i32 [[TMP8]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], [[LOOP13:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], [[LOOP15:!llvm.loop !.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] @@ -661,7 +694,7 @@ define i32 @i32_smax_reduction(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP14:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP16:!llvm.loop !.*]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] @@ -679,7 +712,7 @@ define i32 @i32_smax_reduction(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[ADD]] = select i1 [[C]], i32 [[R_07]], i32 [[TMP8]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], [[LOOP15:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], [[LOOP17:!llvm.loop !.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] @@ -731,7 +764,7 @@ define i32 @i32_umin_reduction(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP16:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP18:!llvm.loop !.*]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] @@ -749,7 +782,7 @@ define i32 @i32_umin_reduction(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[ADD]] = select i1 [[C]], i32 [[R_07]], i32 [[TMP8]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], [[LOOP17:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], [[LOOP19:!llvm.loop !.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] @@ -801,7 +834,7 @@ define i32 @i32_umax_reduction(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP18:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP20:!llvm.loop !.*]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] @@ -819,7 +852,7 @@ define i32 @i32_umax_reduction(i32* nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[ADD]] = select i1 [[C]], i32 [[R_07]], i32 [[TMP8]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], [[LOOP19:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], [[LOOP21:!llvm.loop !.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]