diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index b94cd9de50d17a..00cb4f3784fc5f 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1968,8 +1968,9 @@ InstructionCost AArch64TTIImpl::getCFInstrCost(unsigned Opcode, return 0; } -InstructionCost AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) { +InstructionCost AArch64TTIImpl::getVectorInstrCostHelper(Type *Val, + unsigned Index, + bool HasRealUse) { assert(Val->isVectorTy() && "This must be a vector type"); if (Index != -1U) { @@ -1988,7 +1989,18 @@ InstructionCost AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, } // The element at index zero is already inside the vector. - if (Index == 0) + // - For a physical (HasRealUse==true) insert-element or extract-element + // instruction that extracts integers, an explicit FPR -> GPR move is + // needed. So it has non-zero cost. + // - For the rest of cases (virtual instruction or element type is float), + // consider the instruction free. + // + // FIXME: + // If the extract-element and insert-element instructions could be + // simplified away (e.g., could be combined into users by looking at use-def + // context), they have no cost. This is not done in the first place for + // compile-time considerations. + if (Index == 0 && (!HasRealUse || !Val->getScalarType()->isIntegerTy())) return 0; } @@ -1996,6 +2008,16 @@ InstructionCost AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, return ST->getVectorInsertExtractBaseCost(); } +InstructionCost AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) { + return getVectorInstrCostHelper(Val, Index, false /* HasRealUse */); +} + +InstructionCost AArch64TTIImpl::getVectorInstrCost(const Instruction &I, + Type *Val, unsigned Index) { + return getVectorInstrCostHelper(Val, Index, true /* HasRealUse */); +} + InstructionCost AArch64TTIImpl::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index bf917b5b9d84b9..473a00d5a9f57a 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -59,6 +59,14 @@ class AArch64TTIImpl : public BasicTTIImplBase { bool isWideningInstruction(Type *Ty, unsigned Opcode, ArrayRef Args); + // A helper function called by 'getVectorInstrCost'. + // + // 'Val' and 'Index' are forwarded from 'getVectorInstrCost'; 'HasRealUse' + // indicates whether the vector instruction is available in the input IR or + // just imaginary in vectorizer passes. + InstructionCost getVectorInstrCostHelper(Type *Val, unsigned Index, + bool HasRealUse); + public: explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), @@ -173,9 +181,10 @@ class AArch64TTIImpl : public BasicTTIImplBase { InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); - using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); + InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, + unsigned Index); InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, diff --git a/llvm/test/Analysis/CostModel/AArch64/kryo-inseltpoison.ll b/llvm/test/Analysis/CostModel/AArch64/kryo-inseltpoison.ll index c9ee6c93e86938..ca1e6655170a1a 100644 --- a/llvm/test/Analysis/CostModel/AArch64/kryo-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/AArch64/kryo-inseltpoison.ll @@ -6,18 +6,16 @@ target triple = "aarch64--linux-gnu" ; CHECK-LABEL: vectorInstrCost define void @vectorInstrCost() { - ; Vector extracts - extracting the first element should have a zero cost; - ; all other elements should have a cost of two. + ; Vector extracts - extracting elements should have a cost of two. ; - ; CHECK: cost of 0 {{.*}} extractelement <2 x i64> undef, i32 0 + ; CHECK: cost of 2 {{.*}} extractelement <2 x i64> undef, i32 0 ; CHECK: cost of 2 {{.*}} extractelement <2 x i64> undef, i32 1 %t1 = extractelement <2 x i64> undef, i32 0 %t2 = extractelement <2 x i64> undef, i32 1 - ; Vector inserts - inserting the first element should have a zero cost; all - ; other elements should have a cost of two. + ; Vector inserts - inserting elements should have a cost of two. ; - ; CHECK: cost of 0 {{.*}} insertelement <2 x i64> poison, i64 undef, i32 0 + ; CHECK: cost of 2 {{.*}} insertelement <2 x i64> poison, i64 undef, i32 0 ; CHECK: cost of 2 {{.*}} insertelement <2 x i64> poison, i64 undef, i32 1 %t3 = insertelement <2 x i64> poison, i64 undef, i32 0 %t4 = insertelement <2 x i64> poison, i64 undef, i32 1 diff --git a/llvm/test/Analysis/CostModel/AArch64/kryo.ll b/llvm/test/Analysis/CostModel/AArch64/kryo.ll index 8dca867264951f..0ee72e10e9b3e9 100644 --- a/llvm/test/Analysis/CostModel/AArch64/kryo.ll +++ b/llvm/test/Analysis/CostModel/AArch64/kryo.ll @@ -6,45 +6,18 @@ target triple = "aarch64--linux-gnu" ; CHECK-LABEL: vectorInstrCost define void @vectorInstrCost() { - ; Vector extracts - extracting the first element should have a zero cost; - ; all other elements should have a cost of two. + ; Vector extracts - extracting elements should have a cost of two. ; - ; CHECK: cost of 0 {{.*}} extractelement <2 x i64> undef, i32 0 + ; CHECK: cost of 2 {{.*}} extractelement <2 x i64> undef, i32 0 ; CHECK: cost of 2 {{.*}} extractelement <2 x i64> undef, i32 1 %t1 = extractelement <2 x i64> undef, i32 0 %t2 = extractelement <2 x i64> undef, i32 1 - ; Vector inserts - inserting the first element should have a zero cost; all - ; other elements should have a cost of two. + ; Vector inserts - inserting elements should have a cost of two. ; - ; CHECK: cost of 0 {{.*}} insertelement <2 x i64> undef, i64 undef, i32 0 + ; CHECK: cost of 2 {{.*}} insertelement <2 x i64> undef, i64 undef, i32 0 ; CHECK: cost of 2 {{.*}} insertelement <2 x i64> undef, i64 undef, i32 1 %t3 = insertelement <2 x i64> undef, i64 undef, i32 0 %t4 = insertelement <2 x i64> undef, i64 undef, i32 1 - ret void } - -; CHECK-LABEL: vectorInstrExtractCost -define i64 @vectorInstrExtractCost(<4 x i64> %vecreg) { - - ; Vector extracts - extracting each element at index 0 is considered - ; free in the current implementation. When extracting element at index - ; 2, 2 is rounded to 0, so extracting element at index 2 has cost 0 as - ; well. - ; - ; CHECK: cost of 2 {{.*}} extractelement <4 x i64> %vecreg, i32 1 - ; CHECK: cost of 0 {{.*}} extractelement <4 x i64> %vecreg, i32 2 - %t1 = extractelement <4 x i64> %vecreg, i32 1 - %t2 = extractelement <4 x i64> %vecreg, i32 2 - %ele = add i64 %t2, 1 - %cond = icmp eq i64 %t1, %ele - - ; CHECK: cost of 0 {{.*}} extractelement <4 x i64> %vecreg, i32 0 - ; CHECK: cost of 2 {{.*}} extractelement <4 x i64> %vecreg, i32 3 - %t0 = extractelement <4 x i64> %vecreg, i32 0 - %t3 = extractelement <4 x i64> %vecreg, i32 3 - %val = select i1 %cond, i64 %t0 , i64 %t3 - - ret i64 %val -} diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-insert-extract.ll b/llvm/test/Analysis/CostModel/AArch64/sve-insert-extract.ll index 5a3da82c8c0c1c..ad79609b946607 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-insert-extract.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-insert-extract.ll @@ -9,10 +9,10 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" define void @ins_el0() #0 { ; CHECK-DEFAULT-LABEL: 'ins_el0' -; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v0 = insertelement zeroinitializer, i8 0, i64 0 -; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v1 = insertelement zeroinitializer, i16 0, i64 0 -; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2 = insertelement zeroinitializer, i32 0, i64 0 -; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3 = insertelement zeroinitializer, i64 0, i64 0 +; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v0 = insertelement zeroinitializer, i8 0, i64 0 +; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v1 = insertelement zeroinitializer, i16 0, i64 0 +; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement zeroinitializer, i32 0, i64 0 +; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3 = insertelement zeroinitializer, i64 0, i64 0 ; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4 = insertelement zeroinitializer, float 0.000000e+00, i64 0 ; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5 = insertelement zeroinitializer, double 0.000000e+00, i64 0 ; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -27,10 +27,10 @@ define void @ins_el0() #0 { ; CHECK-LOW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-HIGH-LABEL: 'ins_el0' -; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v0 = insertelement zeroinitializer, i8 0, i64 0 -; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v1 = insertelement zeroinitializer, i16 0, i64 0 -; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2 = insertelement zeroinitializer, i32 0, i64 0 -; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3 = insertelement zeroinitializer, i64 0, i64 0 +; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 100000 for instruction: %v0 = insertelement zeroinitializer, i8 0, i64 0 +; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 100000 for instruction: %v1 = insertelement zeroinitializer, i16 0, i64 0 +; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 100000 for instruction: %v2 = insertelement zeroinitializer, i32 0, i64 0 +; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 100000 for instruction: %v3 = insertelement zeroinitializer, i64 0, i64 0 ; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4 = insertelement zeroinitializer, float 0.000000e+00, i64 0 ; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5 = insertelement zeroinitializer, double 0.000000e+00, i64 0 ; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -84,10 +84,10 @@ define void @ins_el1() #0 { define void @ext_el0() #0 { ; CHECK-DEFAULT-LABEL: 'ext_el0' -; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v0 = extractelement zeroinitializer, i64 0 -; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v1 = extractelement zeroinitializer, i64 0 -; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2 = extractelement zeroinitializer, i64 0 -; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3 = extractelement zeroinitializer, i64 0 +; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v0 = extractelement zeroinitializer, i64 0 +; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v1 = extractelement zeroinitializer, i64 0 +; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2 = extractelement zeroinitializer, i64 0 +; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3 = extractelement zeroinitializer, i64 0 ; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4 = extractelement zeroinitializer, i64 0 ; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5 = extractelement zeroinitializer, i64 0 ; CHECK-DEFAULT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -102,10 +102,10 @@ define void @ext_el0() #0 { ; CHECK-LOW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-HIGH-LABEL: 'ext_el0' -; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v0 = extractelement zeroinitializer, i64 0 -; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v1 = extractelement zeroinitializer, i64 0 -; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2 = extractelement zeroinitializer, i64 0 -; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v3 = extractelement zeroinitializer, i64 0 +; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 100000 for instruction: %v0 = extractelement zeroinitializer, i64 0 +; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 100000 for instruction: %v1 = extractelement zeroinitializer, i64 0 +; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 100000 for instruction: %v2 = extractelement zeroinitializer, i64 0 +; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 100000 for instruction: %v3 = extractelement zeroinitializer, i64 0 ; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4 = extractelement zeroinitializer, i64 0 ; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v5 = extractelement zeroinitializer, i64 0 ; CHECK-HIGH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void diff --git a/llvm/test/Transforms/LICM/AArch64/extract-element.ll b/llvm/test/Transforms/LICM/AArch64/extract-element.ll index b156b81d6708d2..4d36edc75bea74 100644 --- a/llvm/test/Transforms/LICM/AArch64/extract-element.ll +++ b/llvm/test/Transforms/LICM/AArch64/extract-element.ll @@ -18,24 +18,23 @@ define i1 @func(ptr %0, i64 %1) { ; CHECK-NEXT: [[TMP12]] = add i64 [[TMP4]], 1 ; CHECK-NEXT: br label [[TMP3]] ; CHECK: .split.loop.exit: -; CHECK-NEXT: [[DOTLCSSA7:%.*]] = phi <1 x i64> [ [[TMP8]], [[TMP6]] ] +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i64 [ [[TMP9]], [[TMP6]] ] ; CHECK-NEXT: [[DOTLCSSA6:%.*]] = phi i64 [ [[TMP4]], [[TMP6]] ] ; CHECK-NEXT: [[DOTPH:%.*]] = phi i1 [ [[TMP5]], [[TMP6]] ] -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[DOTLCSSA7]], i64 0 -; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], -1 -; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], [[DOTLCSSA6]] -; CHECK-NEXT: [[TMP16:%.*]] = icmp uge i64 [[TMP15]], [[TMP1]] -; CHECK-NEXT: br label [[TMP17:%.*]] +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[DOTLCSSA]], -1 +; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[TMP13]], [[DOTLCSSA6]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp uge i64 [[TMP14]], [[TMP1]] +; CHECK-NEXT: br label [[TMP16:%.*]] ; CHECK: .split.loop.exit2: ; CHECK-NEXT: [[DOTPH3:%.*]] = phi i1 [ [[TMP5]], [[TMP3]] ] ; CHECK-NEXT: [[DOTPH4:%.*]] = phi i1 [ undef, [[TMP3]] ] -; CHECK-NEXT: br label [[TMP17]] -; CHECK: 17: -; CHECK-NEXT: [[TMP18:%.*]] = phi i1 [ [[DOTPH]], [[DOTSPLIT_LOOP_EXIT]] ], [ [[DOTPH3]], [[DOTSPLIT_LOOP_EXIT2]] ] -; CHECK-NEXT: [[TMP19:%.*]] = phi i1 [ [[TMP16]], [[DOTSPLIT_LOOP_EXIT]] ], [ [[DOTPH4]], [[DOTSPLIT_LOOP_EXIT2]] ] -; CHECK-NEXT: [[TMP20:%.*]] = xor i1 [[TMP18]], true -; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i1 true, i1 [[TMP19]] -; CHECK-NEXT: ret i1 [[TMP21]] +; CHECK-NEXT: br label [[TMP16]] +; CHECK: 16: +; CHECK-NEXT: [[TMP17:%.*]] = phi i1 [ [[DOTPH]], [[DOTSPLIT_LOOP_EXIT]] ], [ [[DOTPH3]], [[DOTSPLIT_LOOP_EXIT2]] ] +; CHECK-NEXT: [[TMP18:%.*]] = phi i1 [ [[TMP15]], [[DOTSPLIT_LOOP_EXIT]] ], [ [[DOTPH4]], [[DOTSPLIT_LOOP_EXIT2]] ] +; CHECK-NEXT: [[TMP19:%.*]] = xor i1 [[TMP17]], true +; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i1 true, i1 [[TMP18]] +; CHECK-NEXT: ret i1 [[TMP20]] ; br label %3